summaryrefslogtreecommitdiffstats
path: root/xlators/debug/error-gen/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/debug/error-gen/src')
0 files changed, 0 insertions, 0 deletions
d' style='width: 0.1%;'/> -rw-r--r--.testignore4
-rw-r--r--CONTRIBUTING25
-rw-r--r--CONTRIBUTING.md114
-rw-r--r--MAINTAINERS195
-rw-r--r--Makefile.am10
-rw-r--r--README.md3
-rw-r--r--api/examples/glfsxmp.c299
-rw-r--r--api/src/Makefile.am2
-rw-r--r--api/src/README.Symbol_Versions2
-rw-r--r--api/src/gfapi-messages.h97
-rw-r--r--api/src/gfapi.aliases47
-rw-r--r--api/src/gfapi.map32
-rw-r--r--api/src/glfs-fops.c1311
-rw-r--r--api/src/glfs-handleops.c275
-rw-r--r--api/src/glfs-handles.h16
-rw-r--r--api/src/glfs-internal.h110
-rw-r--r--api/src/glfs-master.c41
-rw-r--r--api/src/glfs-mem-types.h2
-rw-r--r--api/src/glfs-mgmt.c261
-rw-r--r--api/src/glfs-resolve.c178
-rw-r--r--api/src/glfs.c321
-rw-r--r--api/src/glfs.h298
-rwxr-xr-xautogen.sh2
-rwxr-xr-xbuild-aux/pkg-version20
-rw-r--r--cli/src/Makefile.am15
-rw-r--r--cli/src/cli-cmd-global.c71
-rw-r--r--cli/src/cli-cmd-misc.c21
-rw-r--r--cli/src/cli-cmd-parser.c690
-rw-r--r--cli/src/cli-cmd-peer.c28
-rw-r--r--cli/src/cli-cmd-snapshot.c14
-rw-r--r--cli/src/cli-cmd-system.c77
-rw-r--r--cli/src/cli-cmd-volume.c865
-rw-r--r--cli/src/cli-cmd.c46
-rw-r--r--cli/src/cli-cmd.h5
-rw-r--r--cli/src/cli-mem-types.h2
-rw-r--r--cli/src/cli-quotad-client.c5
-rw-r--r--cli/src/cli-quotad-client.h4
-rw-r--r--cli/src/cli-rl.c15
-rw-r--r--cli/src/cli-rpc-ops.c3567
-rw-r--r--cli/src/cli-xml-output.c571
-rw-r--r--cli/src/cli.c196
-rw-r--r--cli/src/cli.h66
-rw-r--r--cli/src/input.c3
-rw-r--r--configure.ac385
-rw-r--r--contrib/fuse-lib/misc.c2
-rw-r--r--contrib/fuse-lib/mount-gluster-compat.h6
-rw-r--r--contrib/fuse-lib/mount.c7
-rw-r--r--contrib/macfuse/mount_darwin.c6
-rw-r--r--contrib/sunrpc/xdr_sizeof.c204
-rw-r--r--contrib/timer-wheel/timer-wheel.c9
-rw-r--r--contrib/timer-wheel/timer-wheel.h4
-rw-r--r--contrib/umountd/umountd.c10
-rw-r--r--contrib/userspace-rcu/static-wfcqueue.h685
-rw-r--r--contrib/userspace-rcu/static-wfstack.h455
-rw-r--r--contrib/userspace-rcu/wfcqueue.h216
-rw-r--r--contrib/userspace-rcu/wfstack.h178
-rw-r--r--doc/Makefile.am4
-rw-r--r--doc/README.md2
-rw-r--r--doc/debugging/analyzing-regression-cores.md43
-rw-r--r--doc/debugging/gfid-to-path.md45
-rw-r--r--doc/debugging/split-brain.md75
-rw-r--r--doc/debugging/statedump.md77
-rw-r--r--doc/developer-guide/Language-Bindings.md1
-rw-r--r--doc/developer-guide/README.md12
-rw-r--r--doc/developer-guide/Using-Gluster-Test-Framework.md1
-rw-r--r--doc/developer-guide/afr-locks-evolution.md6
-rw-r--r--doc/developer-guide/afr-self-heal-daemon.md2
-rw-r--r--doc/developer-guide/bd-xlator.md469
-rw-r--r--doc/developer-guide/brickmux-thread-reduction.md64
-rw-r--r--doc/developer-guide/coding-standard.md28
-rw-r--r--doc/developer-guide/commit-guidelines.md2
-rw-r--r--doc/developer-guide/datastructure-inode.md61
-rw-r--r--doc/developer-guide/datastructure-iobuf.md36
-rw-r--r--doc/developer-guide/datastructure-mem-pool.md8
-rw-r--r--doc/developer-guide/dirops-transactions-in-dht.md3
-rw-r--r--doc/developer-guide/fuse-interrupt.md97
-rw-r--r--doc/developer-guide/identifying-resource-leaks.md24
-rw-r--r--doc/developer-guide/logging-guidelines.md2
-rw-r--r--doc/developer-guide/network_compression.md20
-rw-r--r--doc/developer-guide/options-to-contribute.md2
-rw-r--r--doc/developer-guide/syncop.md2
-rw-r--r--doc/developer-guide/thread-naming.md6
-rw-r--r--doc/developer-guide/translator-development.md4
-rw-r--r--doc/developer-guide/xlator-classification.md2
-rw-r--r--doc/features/ctime.md32
-rw-r--r--doc/gluster.839
-rw-r--r--doc/glusterd.84
-rw-r--r--doc/glusterfs.813
-rw-r--r--doc/glusterfsd.89
-rw-r--r--doc/mount.glusterfs.818
-rw-r--r--events/src/Makefile.am4
-rw-r--r--events/src/eventsapiconf.py.in2
-rw-r--r--events/src/gf_event.py6
-rw-r--r--events/src/glustereventsd.py42
-rw-r--r--events/src/peer_eventsapi.py55
-rw-r--r--events/src/utils.py22
-rw-r--r--extras/Makefile.am19
-rw-r--r--extras/cliutils/README.md2
-rw-r--r--extras/cliutils/cliutils.py9
-rwxr-xr-xextras/control-cpu-load.sh2
-rwxr-xr-xextras/control-mem.sh2
-rwxr-xr-xextras/create_new_xlator/generate_xlator.py2
-rw-r--r--extras/create_new_xlator/new-xlator.c.tmpl (renamed from extras/create_new_xlator/new-xlator-tmpl.c)84
-rw-r--r--extras/distributed-testing/distributed-test-env1
-rwxr-xr-xextras/distributed-testing/distributed-test-runner.py12
-rw-r--r--extras/ec-heal-script/README.md69
-rwxr-xr-xextras/ec-heal-script/correct_pending_heals.sh415
-rwxr-xr-xextras/ec-heal-script/gfid_needing_heal_parallel.sh278
-rw-r--r--extras/ganesha/Makefile.am2
-rw-r--r--extras/ganesha/config/Makefile.am4
-rw-r--r--extras/ganesha/config/ganesha-ha.conf.sample19
-rw-r--r--extras/ganesha/ocf/Makefile.am11
-rw-r--r--extras/ganesha/ocf/ganesha_grace221
-rw-r--r--extras/ganesha/ocf/ganesha_mon234
-rw-r--r--extras/ganesha/ocf/ganesha_nfsd167
-rw-r--r--extras/ganesha/scripts/Makefile.am6
-rwxr-xr-xextras/ganesha/scripts/create-export-ganesha.sh92
-rwxr-xr-xextras/ganesha/scripts/dbus-send.sh70
-rw-r--r--extras/ganesha/scripts/ganesha-ha.sh1199
-rwxr-xr-xextras/ganesha/scripts/generate-epoch.py48
-rw-r--r--extras/geo-rep/Makefile.am2
-rw-r--r--extras/geo-rep/gsync-sync-gfid.c4
-rw-r--r--extras/geo-rep/schedule_georep.py.in6
-rw-r--r--extras/glusterd.vol.in5
-rwxr-xr-xextras/glusterfs-georep-upgrade.py77
-rw-r--r--extras/glusterfs-logrotate21
-rw-r--r--extras/group-distributed-virt10
-rw-r--r--extras/group-gluster-block8
-rw-r--r--extras/group-samba11
-rw-r--r--extras/group-virt.example8
-rwxr-xr-xextras/hook-scripts/S56glusterd-geo-rep-create-post.sh2
-rw-r--r--extras/hook-scripts/add-brick/post/Makefile.am4
-rwxr-xr-xextras/hook-scripts/add-brick/post/S10selinux-label-brick.sh100
-rwxr-xr-xextras/hook-scripts/create/post/S10selinux-label-brick.sh13
-rwxr-xr-xextras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh66
-rwxr-xr-xextras/hook-scripts/set/post/S30samba-set.sh15
-rwxr-xr-xextras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh27
-rw-r--r--extras/hook-scripts/start/post/Makefile.am2
-rwxr-xr-xextras/hook-scripts/start/post/S30samba-start.sh20
-rwxr-xr-xextras/hook-scripts/start/post/S31ganesha-start.sh122
-rwxr-xr-xextras/identify-hangs.sh53
-rwxr-xr-xextras/mount-shared-storage.sh2
-rwxr-xr-xextras/ocf/volume.in22
-rw-r--r--extras/python/Makefile.am7
-rw-r--r--extras/python/__init__.py (renamed from xlators/features/glupy/src/__init__.py.in)0
-rwxr-xr-xextras/quota/quota_fsck.py20
-rwxr-xr-xextras/snap_scheduler/gcron.py7
-rwxr-xr-xextras/snap_scheduler/snap_scheduler.py4
-rwxr-xr-xextras/statedumpparse.rb208
-rw-r--r--extras/systemd/Makefile.am10
-rw-r--r--extras/systemd/gluster-ta-volume.service.in13
-rw-r--r--extras/systemd/glusterd.service.in14
-rw-r--r--extras/systemd/glustereventsd.service.in4
-rw-r--r--extras/thin-arbiter/gluster-ta-volume.service13
-rwxr-xr-xextras/thin-arbiter/setup-thin-arbiter.sh198
-rw-r--r--extras/thin-arbiter/thin-arbiter.vol5
-rw-r--r--extras/who-wrote-glusterfs/gitdm.domain-map1
-rw-r--r--geo-replication/gsyncd.conf.in22
-rw-r--r--geo-replication/setup.py6
-rw-r--r--geo-replication/src/gsyncd.c14
-rwxr-xr-xgeo-replication/src/gverify.sh33
-rw-r--r--geo-replication/src/peer_georep-sshkey.py.in4
-rwxr-xr-xgeo-replication/src/peer_gsec_create.in4
-rw-r--r--geo-replication/src/peer_mountbroker.py.in21
-rw-r--r--geo-replication/src/procdiggy.c24
-rwxr-xr-xgeo-replication/src/set_geo_rep_pem_keys.sh1
-rw-r--r--geo-replication/syncdaemon/Makefile.am2
-rw-r--r--geo-replication/syncdaemon/README.md1
-rw-r--r--geo-replication/syncdaemon/argsupgrade.py13
-rw-r--r--geo-replication/syncdaemon/changelogagent.py78
-rw-r--r--geo-replication/syncdaemon/gsyncd.py37
-rw-r--r--geo-replication/syncdaemon/gsyncdconfig.py100
-rw-r--r--geo-replication/syncdaemon/gsyncdstatus.py4
-rw-r--r--geo-replication/syncdaemon/libcxattr.py16
-rw-r--r--geo-replication/syncdaemon/libgfchangelog.py253
-rw-r--r--geo-replication/syncdaemon/master.py109
-rw-r--r--geo-replication/syncdaemon/monitor.py99
-rw-r--r--geo-replication/syncdaemon/py2py3.py141
-rw-r--r--geo-replication/syncdaemon/repce.py5
-rw-r--r--geo-replication/syncdaemon/resource.py191
-rw-r--r--geo-replication/syncdaemon/subcmds.py26
-rw-r--r--geo-replication/syncdaemon/syncdutils.py160
-rwxr-xr-xgeo-replication/tests/unit/test_gsyncdstatus.py10
-rw-r--r--glusterfs-api.pc.in2
-rw-r--r--glusterfs.spec.in762
-rw-r--r--glusterfsd/src/Makefile.am5
-rw-r--r--glusterfsd/src/gf_attach.c61
-rw-r--r--glusterfsd/src/glusterfsd-mem-types.h2
-rw-r--r--glusterfsd/src/glusterfsd-messages.h81
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c832
-rw-r--r--glusterfsd/src/glusterfsd.c754
-rw-r--r--glusterfsd/src/glusterfsd.h14
-rw-r--r--heal/src/Makefile.am6
-rw-r--r--heal/src/glfs-heal.c38
-rw-r--r--libgfchangelog.pc.in2
-rw-r--r--libgfdb.pc.in12
-rw-r--r--libglusterd/Makefile.am (renamed from xlators/experimental/dht2/dht2-client/Makefile.am)0
-rw-r--r--libglusterd/src/Makefile.am31
-rw-r--r--libglusterd/src/gd-common-utils.c78
-rw-r--r--libglusterd/src/gd-common-utils.h28
-rw-r--r--libglusterd/src/libglusterd.sym2
-rw-r--r--libglusterfs/src/Makefile.am63
-rw-r--r--libglusterfs/src/async.c720
-rw-r--r--libglusterfs/src/call-stub.c225
-rw-r--r--libglusterfs/src/circ-buff.c4
-rw-r--r--libglusterfs/src/client_t.c124
-rw-r--r--libglusterfs/src/cluster-syncop.c10
-rw-r--r--libglusterfs/src/common-utils.c721
-rw-r--r--libglusterfs/src/compat-errno.c2
-rw-r--r--libglusterfs/src/compat.c20
-rw-r--r--libglusterfs/src/compound-fop-utils.c138
-rw-r--r--libglusterfs/src/compound-fop-utils.h36
-rw-r--r--libglusterfs/src/ctx.c12
-rw-r--r--libglusterfs/src/daemon.c3
-rw-r--r--libglusterfs/src/default-args.c48
-rw-r--r--libglusterfs/src/defaults-tmpl.c27
-rw-r--r--libglusterfs/src/dict.c533
-rw-r--r--libglusterfs/src/event-epoll.c214
-rw-r--r--libglusterfs/src/event-history.c4
-rw-r--r--libglusterfs/src/event-poll.c73
-rw-r--r--libglusterfs/src/event.c57
-rw-r--r--libglusterfs/src/events.c91
-rw-r--r--libglusterfs/src/fd-lk.c6
-rw-r--r--libglusterfs/src/fd.c174
-rwxr-xr-xlibglusterfs/src/generator.py13
-rw-r--r--libglusterfs/src/gf-dirent.c7
-rw-r--r--libglusterfs/src/gfdb/Makefile.am37
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store.c802
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store.h331
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store_helper.c588
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store_helper.h95
-rw-r--r--libglusterfs/src/gfdb/gfdb_data_store_types.h532
-rw-r--r--libglusterfs/src/gfdb/gfdb_mem-types.h17
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3.c1542
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3.h328
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3_helper.c1260
-rw-r--r--libglusterfs/src/gfdb/gfdb_sqlite3_helper.h51
-rw-r--r--libglusterfs/src/gidcache.c9
-rw-r--r--libglusterfs/src/globals.c308
-rw-r--r--libglusterfs/src/glusterfs/async.h209
-rw-r--r--libglusterfs/src/glusterfs/atomic.h (renamed from libglusterfs/src/atomic.h)2
-rw-r--r--libglusterfs/src/glusterfs/byte-order.h (renamed from libglusterfs/src/byte-order.h)0
-rw-r--r--libglusterfs/src/glusterfs/call-stub.h (renamed from libglusterfs/src/call-stub.h)32
-rw-r--r--libglusterfs/src/glusterfs/checksum.h (renamed from libglusterfs/src/checksum.h)0
-rw-r--r--libglusterfs/src/glusterfs/circ-buff.h (renamed from libglusterfs/src/circ-buff.h)4
-rw-r--r--libglusterfs/src/glusterfs/client_t.h (renamed from libglusterfs/src/client_t.h)32
-rw-r--r--libglusterfs/src/glusterfs/cluster-syncop.h (renamed from libglusterfs/src/cluster-syncop.h)8
-rw-r--r--libglusterfs/src/glusterfs/common-utils.h (renamed from libglusterfs/src/common-utils.h)434
-rw-r--r--libglusterfs/src/glusterfs/compat-errno.h (renamed from libglusterfs/src/compat-errno.h)0
-rw-r--r--libglusterfs/src/glusterfs/compat-uuid.h (renamed from libglusterfs/src/compat-uuid.h)0
-rw-r--r--libglusterfs/src/glusterfs/compat.h (renamed from libglusterfs/src/compat.h)26
-rw-r--r--libglusterfs/src/glusterfs/daemon.h (renamed from libglusterfs/src/daemon.h)0
-rw-r--r--libglusterfs/src/glusterfs/default-args.h (renamed from libglusterfs/src/default-args.h)13
-rw-r--r--libglusterfs/src/glusterfs/defaults.h (renamed from libglusterfs/src/defaults.h)44
-rw-r--r--libglusterfs/src/glusterfs/dict.h (renamed from libglusterfs/src/dict.h)29
-rw-r--r--libglusterfs/src/glusterfs/event-history.h (renamed from libglusterfs/src/event-history.h)6
-rw-r--r--libglusterfs/src/glusterfs/events.h (renamed from libglusterfs/src/events.h)0
-rw-r--r--libglusterfs/src/glusterfs/fd-lk.h (renamed from libglusterfs/src/fd-lk.h)12
-rw-r--r--libglusterfs/src/glusterfs/fd.h (renamed from libglusterfs/src/fd.h)16
-rw-r--r--libglusterfs/src/glusterfs/gf-dirent.h (renamed from libglusterfs/src/gf-dirent.h)4
-rw-r--r--libglusterfs/src/glusterfs/gf-event.h (renamed from libglusterfs/src/gf-event.h)35
-rw-r--r--libglusterfs/src/glusterfs/gidcache.h (renamed from libglusterfs/src/gidcache.h)4
-rw-r--r--libglusterfs/src/glusterfs/glfs-message-id.h (renamed from libglusterfs/src/glfs-message-id.h)1
-rw-r--r--libglusterfs/src/glusterfs/globals.h (renamed from libglusterfs/src/globals.h)26
-rw-r--r--libglusterfs/src/glusterfs/glusterfs-acl.h (renamed from libglusterfs/src/glusterfs-acl.h)4
-rw-r--r--libglusterfs/src/glusterfs/glusterfs-fops.h241
-rw-r--r--libglusterfs/src/glusterfs/glusterfs.h (renamed from libglusterfs/src/glusterfs.h)95
-rw-r--r--libglusterfs/src/glusterfs/graph-utils.h (renamed from libglusterfs/src/graph-utils.h)4
-rw-r--r--libglusterfs/src/glusterfs/hashfn.h (renamed from libglusterfs/src/hashfn.h)2
-rw-r--r--libglusterfs/src/glusterfs/iatt.h (renamed from libglusterfs/src/iatt.h)41
-rw-r--r--libglusterfs/src/glusterfs/inode.h (renamed from libglusterfs/src/inode.h)33
-rw-r--r--libglusterfs/src/glusterfs/iobuf.h (renamed from libglusterfs/src/iobuf.h)21
-rw-r--r--libglusterfs/src/glusterfs/latency.h (renamed from libglusterfs/src/latency.h)22
-rw-r--r--libglusterfs/src/glusterfs/libglusterfs-messages.h245
-rw-r--r--libglusterfs/src/glusterfs/list.h (renamed from libglusterfs/src/list.h)0
-rw-r--r--libglusterfs/src/glusterfs/lkowner.h (renamed from libglusterfs/src/lkowner.h)2
-rw-r--r--libglusterfs/src/glusterfs/locking.h (renamed from libglusterfs/src/locking.h)0
-rw-r--r--libglusterfs/src/glusterfs/logging.h (renamed from libglusterfs/src/logging.h)27
-rw-r--r--libglusterfs/src/glusterfs/lvm-defaults.h (renamed from libglusterfs/src/lvm-defaults.h)0
-rw-r--r--libglusterfs/src/glusterfs/mem-pool.h (renamed from libglusterfs/src/mem-pool.h)82
-rw-r--r--libglusterfs/src/glusterfs/mem-types.h139
-rw-r--r--libglusterfs/src/glusterfs/monitoring.h (renamed from libglusterfs/src/monitoring.h)2
-rw-r--r--libglusterfs/src/glusterfs/options.h (renamed from libglusterfs/src/options.h)48
-rw-r--r--libglusterfs/src/glusterfs/parse-utils.h (renamed from libglusterfs/src/parse-utils.h)0
-rw-r--r--libglusterfs/src/glusterfs/quota-common-utils.h (renamed from libglusterfs/src/quota-common-utils.h)2
-rw-r--r--libglusterfs/src/glusterfs/rbthash.h (renamed from libglusterfs/src/rbthash.h)12
-rw-r--r--libglusterfs/src/glusterfs/refcount.h (renamed from libglusterfs/src/refcount.h)2
-rw-r--r--libglusterfs/src/glusterfs/revision.h (renamed from libglusterfs/src/revision.h)0
-rw-r--r--libglusterfs/src/glusterfs/rot-buffs.h (renamed from libglusterfs/src/rot-buffs.h)6
-rw-r--r--libglusterfs/src/glusterfs/run.h (renamed from libglusterfs/src/run.h)0
-rw-r--r--libglusterfs/src/glusterfs/stack.h (renamed from libglusterfs/src/stack.h)30
-rw-r--r--libglusterfs/src/glusterfs/statedump.h (renamed from libglusterfs/src/statedump.h)6
-rw-r--r--libglusterfs/src/glusterfs/store.h (renamed from libglusterfs/src/store.h)13
-rw-r--r--libglusterfs/src/glusterfs/strfd.h (renamed from libglusterfs/src/strfd.h)0
-rw-r--r--libglusterfs/src/glusterfs/syncop-utils.h (renamed from libglusterfs/src/syncop-utils.h)0
-rw-r--r--libglusterfs/src/glusterfs/syncop.h (renamed from libglusterfs/src/syncop.h)113
-rw-r--r--libglusterfs/src/glusterfs/syscall.h (renamed from libglusterfs/src/syscall.h)47
-rw-r--r--libglusterfs/src/glusterfs/template-component-messages.h (renamed from libglusterfs/src/template-component-messages.h)2
-rw-r--r--libglusterfs/src/glusterfs/throttle-tbf.h (renamed from libglusterfs/src/throttle-tbf.h)6
-rw-r--r--libglusterfs/src/glusterfs/timer.h (renamed from libglusterfs/src/timer.h)9
-rw-r--r--libglusterfs/src/glusterfs/timespec.h (renamed from libglusterfs/src/timespec.h)2
-rw-r--r--libglusterfs/src/glusterfs/trie.h (renamed from libglusterfs/src/trie.h)0
-rw-r--r--libglusterfs/src/glusterfs/upcall-utils.h (renamed from libglusterfs/src/upcall-utils.h)6
-rw-r--r--libglusterfs/src/glusterfs/xlator.h (renamed from libglusterfs/src/xlator.h)85
-rw-r--r--libglusterfs/src/graph-print.c67
-rw-r--r--libglusterfs/src/graph.c704
-rw-r--r--libglusterfs/src/graph.l40
-rw-r--r--libglusterfs/src/graph.y18
-rw-r--r--libglusterfs/src/hashfn.c12
-rw-r--r--libglusterfs/src/inode.c729
-rw-r--r--libglusterfs/src/iobuf.c167
-rw-r--r--libglusterfs/src/latency.c98
-rw-r--r--libglusterfs/src/libglusterfs-messages.h114
-rw-r--r--libglusterfs/src/libglusterfs.sym94
-rw-r--r--libglusterfs/src/locking.c2
-rw-r--r--libglusterfs/src/logging.c692
-rw-r--r--libglusterfs/src/mem-pool.c687
-rw-r--r--libglusterfs/src/mem-types.h181
-rw-r--r--libglusterfs/src/monitoring.c16
-rw-r--r--libglusterfs/src/options.c164
-rw-r--r--libglusterfs/src/parse-utils.c10
-rw-r--r--libglusterfs/src/quota-common-utils.c12
-rw-r--r--libglusterfs/src/rbthash.c60
-rw-r--r--libglusterfs/src/refcount.c4
-rw-r--r--libglusterfs/src/rot-buffs.c6
-rw-r--r--libglusterfs/src/run.c16
-rw-r--r--libglusterfs/src/stack.c12
-rw-r--r--libglusterfs/src/statedump.c207
-rw-r--r--libglusterfs/src/store.c143
-rw-r--r--libglusterfs/src/strfd.c8
-rw-r--r--libglusterfs/src/syncop-utils.c57
-rw-r--r--libglusterfs/src/syncop.c572
-rw-r--r--libglusterfs/src/syscall.c84
-rw-r--r--libglusterfs/src/throttle-tbf.c6
-rw-r--r--libglusterfs/src/tier-ctr-interface.h44
-rw-r--r--libglusterfs/src/timer.c149
-rw-r--r--libglusterfs/src/timespec.c29
-rw-r--r--libglusterfs/src/trie.c6
-rw-r--r--libglusterfs/src/unittest/global_mock.c4
-rw-r--r--libglusterfs/src/unittest/log_mock.c4
-rw-r--r--libglusterfs/src/unittest/mem_pool_unittest.c6
-rw-r--r--libglusterfs/src/xlator.c432
-rwxr-xr-xrfc.sh237
-rw-r--r--rpc/rpc-lib/src/Makefile.am2
-rw-r--r--rpc/rpc-lib/src/auth-glusterfs.c3
-rw-r--r--rpc/rpc-lib/src/auth-null.c3
-rw-r--r--rpc/rpc-lib/src/auth-unix.c3
-rw-r--r--rpc/rpc-lib/src/autoscale-threads.c4
-rw-r--r--rpc/rpc-lib/src/libgfrpc.sym3
-rw-r--r--rpc/rpc-lib/src/mgmt-pmap.c7
-rw-r--r--rpc/rpc-lib/src/protocol-common.h4
-rw-r--r--rpc/rpc-lib/src/rpc-clnt-ping.c8
-rw-r--r--rpc/rpc-lib/src/rpc-clnt-ping.h1
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c145
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h26
-rw-r--r--rpc/rpc-lib/src/rpc-drc.c51
-rw-r--r--rpc/rpc-lib/src/rpc-drc.h6
-rw-r--r--rpc/rpc-lib/src/rpc-lib-messages.h2
-rw-r--r--rpc/rpc-lib/src/rpc-transport.c162
-rw-r--r--rpc/rpc-lib/src/rpc-transport.h53
-rw-r--r--rpc/rpc-lib/src/rpcsvc-auth.c46
-rw-r--r--rpc/rpc-lib/src/rpcsvc-common.h12
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c285
-rw-r--r--rpc/rpc-lib/src/rpcsvc.h92
-rw-r--r--rpc/rpc-lib/src/xdr-common.h2
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.c6
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.h1
-rw-r--r--rpc/rpc-lib/src/xdr-rpcclnt.c6
-rw-r--r--rpc/rpc-lib/src/xdr-rpcclnt.h2
-rw-r--r--rpc/rpc-transport/Makefile.am2
-rw-r--r--rpc/rpc-transport/rdma/Makefile.am1
-rw-r--r--rpc/rpc-transport/rdma/src/Makefile.am24
-rw-r--r--rpc/rpc-transport/rdma/src/name.c703
-rw-r--r--rpc/rpc-transport/rdma/src/name.h35
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c4907
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.h384
-rw-r--r--rpc/rpc-transport/rdma/src/rpc-trans-rdma-messages.h66
-rw-r--r--rpc/rpc-transport/socket/src/name.c110
-rw-r--r--rpc/rpc-transport/socket/src/name.h2
-rw-r--r--rpc/rpc-transport/socket/src/socket-mem-types.h2
-rw-r--r--rpc/rpc-transport/socket/src/socket.c1002
-rw-r--r--rpc/rpc-transport/socket/src/socket.h34
-rw-r--r--rpc/xdr/gen/Makefile.am49
-rw-r--r--rpc/xdr/src/.gitignore2
-rw-r--r--rpc/xdr/src/Makefile.am70
-rw-r--r--rpc/xdr/src/acl3-xdr.x2
-rw-r--r--rpc/xdr/src/changelog-xdr.x2
-rw-r--r--rpc/xdr/src/cli1-xdr.x3
-rw-r--r--rpc/xdr/src/glusterd1-xdr.x18
-rw-r--r--rpc/xdr/src/glusterfs-fops.x248
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.x123
-rw-r--r--rpc/xdr/src/glusterfs3.h67
-rw-r--r--rpc/xdr/src/glusterfs4-xdr.x152
-rw-r--r--rpc/xdr/src/libgfxdr.sym4
-rw-r--r--rpc/xdr/src/mount3udp.x2
-rw-r--r--rpc/xdr/src/nlm4-xdr.x2
-rw-r--r--rpc/xdr/src/nsm-xdr.x2
-rw-r--r--rpc/xdr/src/portmap-xdr.x2
-rw-r--r--rpc/xdr/src/rpc-common-xdr.x3
-rw-r--r--rpc/xdr/src/xdr-generic.h2
-rw-r--r--rpc/xdr/src/xdr-nfs3.c2
-rwxr-xr-xrun-tests.sh99
-rw-r--r--tests/00-geo-rep/00-georep-verify-non-root-setup.t294
-rw-r--r--tests/00-geo-rep/00-georep-verify-setup.t16
-rw-r--r--tests/00-geo-rep/01-georep-glusterd-tests.t213
-rw-r--r--tests/00-geo-rep/bug-1600145.t109
-rw-r--r--tests/00-geo-rep/bug-1708603.t63
-rw-r--r--tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t22
-rw-r--r--tests/00-geo-rep/georep-basic-dr-rsync.t44
-rw-r--r--tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t18
-rw-r--r--tests/00-geo-rep/georep-basic-dr-tarssh.t14
-rw-r--r--tests/00-geo-rep/georep-basic-rsync-ec.t224
-rw-r--r--tests/00-geo-rep/georep-basic-tarssh-ec.t223
-rw-r--r--tests/00-geo-rep/georep-config-upgrade.t132
-rw-r--r--tests/00-geo-rep/georep-stderr-hang.t128
-rw-r--r--tests/00-geo-rep/georep-upgrade.t79
-rw-r--r--tests/00-geo-rep/gsyncd.conf.old47
-rw-r--r--tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t (renamed from tests/basic/afr/split-brain-favorite-child-policy.t)11
-rw-r--r--tests/000-flaky/basic_changelog_changelog-snapshot.t60
-rw-r--r--tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t (renamed from tests/basic/distribute/rebal-all-nodes-migrate.t)8
-rw-r--r--tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t50
-rw-r--r--[-rwxr-xr-x]tests/000-flaky/basic_mount-nfs-auth.t (renamed from tests/basic/mount-nfs-auth.t)2
-rw-r--r--tests/000-flaky/bugs_core_multiplex-limit-issue-151.t (renamed from tests/bugs/core/multiplex-limit-issue-151.t)6
-rw-r--r--[-rwxr-xr-x]tests/000-flaky/bugs_distribute_bug-1117851.t (renamed from tests/bugs/distribute/bug-1117851.t)6
-rw-r--r--tests/000-flaky/bugs_distribute_bug-1122443.t (renamed from tests/bugs/distribute/bug-1122443.t)17
-rw-r--r--tests/000-flaky/bugs_glusterd_bug-857330/common.rc (renamed from tests/bugs/glusterd/bug-857330/common.rc)2
-rwxr-xr-xtests/000-flaky/bugs_glusterd_bug-857330/normal.t (renamed from tests/bugs/glusterd/bug-857330/normal.t)4
-rwxr-xr-xtests/000-flaky/bugs_glusterd_bug-857330/xml.t (renamed from tests/bugs/glusterd/bug-857330/xml.t)4
-rw-r--r--tests/000-flaky/bugs_glusterd_quorum-value-check.t37
-rw-r--r--tests/000-flaky/bugs_nfs_bug-1116503.t (renamed from tests/bugs/nfs/bug-1116503.t)7
-rw-r--r--tests/000-flaky/features_lock-migration_lkmigration-set-option.t (renamed from tests/features/lock-migration/lkmigration-set-option.t)4
-rw-r--r--tests/afr.rc16
-rw-r--r--tests/basic/afr/afr-anon-inode-no-quorum.t63
-rw-r--r--tests/basic/afr/afr-anon-inode.t114
-rw-r--r--tests/basic/afr/afr-seek.t55
-rw-r--r--tests/basic/afr/arbiter-mount.t3
-rw-r--r--tests/basic/afr/durability-off.t2
-rw-r--r--tests/basic/afr/entry-self-heal-anon-dir-off.t459
-rw-r--r--tests/basic/afr/entry-self-heal.t3
-rw-r--r--tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t9
-rw-r--r--tests/basic/afr/gfid-self-heal.t16
-rw-r--r--tests/basic/afr/granular-esh/cli.t2
-rw-r--r--tests/basic/afr/halo.t61
-rw-r--r--tests/basic/afr/rename-data-loss.t72
-rw-r--r--tests/basic/afr/root-squash-self-heal.t3
-rw-r--r--tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t124
-rw-r--r--tests/basic/afr/split-brain-heal-info.t2
-rw-r--r--tests/basic/afr/split-brain-healing-ctime.t252
-rw-r--r--tests/basic/afr/split-brain-healing.t37
-rw-r--r--tests/basic/afr/split-brain-resolution.t15
-rw-r--r--tests/basic/afr/ta-check-locks.t68
-rw-r--r--tests/basic/afr/ta-read.t16
-rw-r--r--tests/basic/afr/ta-shd.t6
-rw-r--r--tests/basic/afr/ta-write-on-bad-brick.t14
-rw-r--r--tests/basic/afr/ta.t22
-rw-r--r--tests/basic/afr/tarissue.t2
-rw-r--r--tests/basic/all_squash.t74
-rw-r--r--tests/basic/changelog/changelog-api.t37
-rw-r--r--tests/basic/changelog/changelog-history.t13
-rw-r--r--tests/basic/changelog/history-api.t42
-rw-r--r--tests/basic/cloudsync-sanity.t7
-rw-r--r--tests/basic/ctime/ctime-ec-heal.t70
-rw-r--r--tests/basic/ctime/ctime-ec-rebalance.t43
-rw-r--r--tests/basic/ctime/ctime-mdata-legacy-files.t83
-rw-r--r--tests/basic/ctime/ctime-readdir.c29
-rw-r--r--tests/basic/ctime/ctime-readdir.t50
-rw-r--r--tests/basic/ctime/ctime-rep-heal.t70
-rw-r--r--tests/basic/ctime/ctime-rep-rebalance.t41
-rw-r--r--tests/basic/ctime/ctime-utimesat.t28
-rw-r--r--tests/basic/distribute/brick-down.t83
-rw-r--r--tests/basic/distribute/dir-heal.t145
-rw-r--r--tests/basic/distribute/file-rename.t1021
-rw-r--r--tests/basic/distribute/spare_file_rebalance.t51
-rw-r--r--tests/basic/ec/ec-1468261.t18
-rw-r--r--tests/basic/ec/ec-badfd.c124
-rwxr-xr-xtests/basic/ec/ec-badfd.t26
-rw-r--r--tests/basic/ec/ec-cpu-extensions.t3
-rw-r--r--tests/basic/ec/ec-dirty-flags.t23
-rw-r--r--tests/basic/ec/ec-fix-openfd.t2
-rw-r--r--tests/basic/ec/ec-quorum-count.t167
-rw-r--r--tests/basic/ec/ec-read-mask.t114
-rw-r--r--tests/basic/ec/ec-reset-brick.t50
-rw-r--r--tests/basic/ec/ec-root-heal.t3
-rw-r--r--tests/basic/ec/ec-seek.t3
-rw-r--r--tests/basic/ec/ec-stripe.t2
-rw-r--r--tests/basic/ec/gfapi-ec-open-truncate.c171
-rw-r--r--tests/basic/ec/gfapi-ec-open-truncate.t48
-rwxr-xr-xtests/basic/ec/nfs.t2
-rw-r--r--tests/basic/ec/self-heal-read-write-fail.t69
-rw-r--r--tests/basic/ec/self-heal.t2
-rw-r--r--tests/basic/fencing/afr-lock-heal-advanced.c227
-rw-r--r--tests/basic/fencing/afr-lock-heal-advanced.t115
-rw-r--r--tests/basic/fencing/afr-lock-heal-basic.c182
-rw-r--r--tests/basic/fencing/afr-lock-heal-basic.t102
-rw-r--r--tests/basic/fencing/fence-basic.c229
-rwxr-xr-xtests/basic/fencing/fence-basic.t31
-rw-r--r--tests/basic/fencing/fencing-crash-conistency.t62
-rw-r--r--tests/basic/fencing/test-fence-option.t37
-rwxr-xr-xtests/basic/first-test.t10
-rw-r--r--tests/basic/fops-sanity.c1
-rw-r--r--tests/basic/fuse/active-io-graph-switch.t65
-rw-r--r--tests/basic/gfapi/bug-1507896.c49
-rw-r--r--tests/basic/gfapi/bug-1507896.t33
-rw-r--r--tests/basic/gfapi/gfapi-async-calls-test.c358
-rw-r--r--tests/basic/gfapi/gfapi-copy-file-range.t82
-rw-r--r--tests/basic/gfapi/gfapi-graph-switch-open-fd.t44
-rw-r--r--tests/basic/gfapi/gfapi-keep-writing.c129
-rw-r--r--tests/basic/gfapi/gfapi-ssl-load-volfile-test.c127
-rwxr-xr-xtests/basic/gfapi/gfapi-ssl-load-volfile-test.t76
-rw-r--r--tests/basic/gfapi/gfapi-statx-basic.c184
-rwxr-xr-xtests/basic/gfapi/gfapi-statx-basic.t30
-rw-r--r--tests/basic/gfapi/glfs-copy-file-range.c180
-rw-r--r--tests/basic/gfapi/glfs_h_creat_open.c118
-rwxr-xr-xtests/basic/gfapi/glfs_h_creat_open.t27
-rw-r--r--tests/basic/gfapi/glfsxmp-coverage.c1900
-rw-r--r--tests/basic/gfapi/glfsxmp.t30
-rw-r--r--tests/basic/gfapi/protocol-client-ssl.vol.in15
-rw-r--r--tests/basic/global-threading.t104
-rw-r--r--tests/basic/glusterd-restart-shd-mux.t96
-rw-r--r--tests/basic/glusterd/arbiter-volume.t29
-rw-r--r--tests/basic/glusterd/check-cloudsync-ancestry.t48
-rw-r--r--tests/basic/glusterd/disperse-create.t4
-rw-r--r--tests/basic/glusterd/heald.t49
-rw-r--r--tests/basic/glusterd/thin-arbiter-volume-probe.t25
-rw-r--r--tests/basic/glusterd/thin-arbiter-volume.t45
-rw-r--r--tests/basic/glusterd/volfile_server_switch.t2
-rw-r--r--tests/basic/glusterd/volume-brick-count.t61
-rw-r--r--tests/basic/graph-cleanup-brick-down-shd-mux.t64
-rw-r--r--tests/basic/logchecks-messages.h2
-rw-r--r--tests/basic/logchecks.c6
-rw-r--r--tests/basic/metadisp/fsyncdir.c29
-rw-r--r--tests/basic/metadisp/ftruncate.c34
-rw-r--r--tests/basic/metadisp/fxattr.c107
-rw-r--r--tests/basic/metadisp/gfs-fsetxattr.c141
-rw-r--r--tests/basic/metadisp/metadisp.t316
-rw-r--r--tests/basic/metadisp/metadisp.vol14
-rw-r--r--tests/basic/mount-options.disabled3
-rwxr-xr-xtests/basic/mount.t6
-rw-r--r--tests/basic/multiple-volume-shd-mux.t46
-rw-r--r--tests/basic/namespace.t2
-rwxr-xr-xtests/basic/nl-cache.t30
-rw-r--r--tests/basic/nufa.t2
-rw-r--r--tests/basic/open-behind/open-behind.t183
-rw-r--r--tests/basic/open-behind/tester-fd.c99
-rw-r--r--tests/basic/open-behind/tester.c444
-rw-r--r--tests/basic/open-behind/tester.h145
-rwxr-xr-xtests/basic/playground/template-xlator-sanity.t18
-rw-r--r--tests/basic/posix/shared-statfs.t11
-rw-r--r--tests/basic/posix/zero-fill-enospace.c7
-rw-r--r--tests/basic/quick-read-with-upcall.t13
-rwxr-xr-xtests/basic/quota-anon-fd-nfs.t2
-rwxr-xr-xtests/basic/quota-nfs.t2
-rwxr-xr-xtests/basic/quota.t2
-rwxr-xr-xtests/basic/rpc-coverage.sh14
-rwxr-xr-xtests/basic/rpc-coverage.t4
-rw-r--r--tests/basic/sdfs-sanity.t6
-rw-r--r--tests/basic/seek.c (renamed from tests/basic/ec/seek.c)0
-rw-r--r--tests/basic/shd-mux-afr.t70
-rw-r--r--tests/basic/shd-mux-ec.t75
-rw-r--r--tests/basic/stats-dump.t2
-rwxr-xr-xtests/basic/trace.t55
-rw-r--r--tests/basic/uss.t36
-rw-r--r--tests/basic/volume-scale-shd-mux.t116
-rw-r--r--tests/basic/volume-snap-scheduler.t49
-rwxr-xr-xtests/basic/volume-snapshot-xml.t6
-rw-r--r--tests/basic/volume-status.t22
-rw-r--r--[-rwxr-xr-x]tests/basic/volume.t36
-rw-r--r--tests/bitrot/br-signer-threads-config-1797869.t73
-rw-r--r--tests/bitrot/br-stub.t2
-rw-r--r--tests/bitrot/bug-1244613.t2
-rw-r--r--tests/bitrot/bug-1373520.t3
-rw-r--r--tests/bitrot/bug-1700078.t87
-rw-r--r--tests/bugs/access-control/bug-958691.t2
-rw-r--r--tests/bugs/bitrot/bug-1227996.t1
-rw-r--r--tests/bugs/bitrot/bug-1245981.t4
-rwxr-xr-xtests/bugs/bug-1064147.t72
-rw-r--r--tests/bugs/bug-1138841.t25
-rwxr-xr-xtests/bugs/bug-1258069.t2
-rw-r--r--tests/bugs/bug-1371806.t1
-rw-r--r--tests/bugs/bug-1371806_acl.t6
-rw-r--r--tests/bugs/bug-1620580.t67
-rw-r--r--tests/bugs/bug-1694920.t63
-rw-r--r--tests/bugs/bug-1702299.t67
-rwxr-xr-xtests/bugs/cli/bug-1320388.t2
-rw-r--r--tests/bugs/cli/bug-1353156-get-state-cli-validations.t1
-rw-r--r--tests/bugs/cli/bug-1378842-volume-get-all.t3
-rw-r--r--tests/bugs/cli/bug-983317-volume-get.t13
-rwxr-xr-xtests/bugs/core/bug-1402841.t-mt-dir-scan-race.t9
-rw-r--r--tests/bugs/core/bug-1432542-mpx-restart-crash.t18
-rw-r--r--tests/bugs/core/bug-1650403.t113
-rw-r--r--tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t33
-rwxr-xr-xtests/bugs/core/bug-927616.t2
-rwxr-xr-xtests/bugs/ctime/issue-832.t32
-rwxr-xr-xtests/bugs/distribute/bug-1125824.t2
-rwxr-xr-xtests/bugs/distribute/bug-1161156.t2
-rwxr-xr-xtests/bugs/distribute/bug-1161311.t27
-rw-r--r--tests/bugs/distribute/bug-1190734.t2
-rw-r--r--tests/bugs/distribute/bug-1193636.t4
-rw-r--r--tests/bugs/distribute/bug-1600379.t54
-rw-r--r--tests/bugs/distribute/bug-1667804.t63
-rwxr-xr-xtests/bugs/distribute/bug-1786679.t69
-rwxr-xr-xtests/bugs/distribute/issue-1327.t33
-rwxr-xr-xtests/bugs/distribute/overlap.py2
-rw-r--r--tests/bugs/ec/bug-1187474.t2
-rw-r--r--tests/bugs/ec/bug-1236065.t2
-rw-r--r--tests/bugs/ec/bug-1699866-check-reopen-fd.t34
-rw-r--r--tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t54
-rwxr-xr-xtests/bugs/fuse/bug-858215.t4
-rw-r--r--tests/bugs/fuse/bug-985074.t5
-rwxr-xr-xtests/bugs/fuse/many-groups-for-acl.t13
-rw-r--r--tests/bugs/gfapi/bug-1319374.c1
-rw-r--r--tests/bugs/gfapi/bug-1447266/bug-1447266.t2
-rwxr-xr-xtests/bugs/glusterd/859927/repl.t3
-rw-r--r--tests/bugs/glusterd/brick-mux-validation-in-cluster.t59
-rw-r--r--tests/bugs/glusterd/brick-mux-validation.t4
-rw-r--r--tests/bugs/glusterd/brick-mux.t2
-rw-r--r--tests/bugs/glusterd/brick-order-check-add-brick.t61
-rwxr-xr-xtests/bugs/glusterd/bug-1070734.t2
-rw-r--r--tests/bugs/glusterd/bug-1595320.t2
-rw-r--r--tests/bugs/glusterd/bug-1696046.t113
-rw-r--r--tests/bugs/glusterd/bug-1699339.t73
-rw-r--r--tests/bugs/glusterd/bug-1720566.t50
-rw-r--r--tests/bugs/glusterd/check_elastic_server.t63
-rw-r--r--tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t18
-rw-r--r--tests/bugs/glusterd/optimized-basic-testcases.t27
-rw-r--r--tests/bugs/glusterd/quorum-validation.t6
-rw-r--r--tests/bugs/glusterd/rebalance-in-cluster.t9
-rw-r--r--tests/bugs/glusterd/rebalance-operations-in-single-node.t4
-rw-r--r--tests/bugs/glusterd/remove-brick-validation.t (renamed from tests/bugs/glusterd/enable-shared-storage-and-remove-brick-validation.t)14
-rw-r--r--tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t1
-rw-r--r--tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t8
-rw-r--r--tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t54
-rw-r--r--tests/bugs/glusterd/validating-options-for-replicated-volume.t13
-rwxr-xr-xtests/bugs/glusterfs-server/bug-852147.t2
-rwxr-xr-xtests/bugs/glusterfs-server/bug-864222.t2
-rw-r--r--tests/bugs/glusterfs-server/bug-873549.t2
-rwxr-xr-xtests/bugs/glusterfs-server/bug-877992.t4
-rwxr-xr-xtests/bugs/glusterfs-server/bug-887145.t16
-rwxr-xr-xtests/bugs/glusterfs-server/bug-904300.t2
-rwxr-xr-xtests/bugs/glusterfs/bug-844688.t43
-rw-r--r--tests/bugs/glusterfs/bug-867253.t2
-rwxr-xr-xtests/bugs/glusterfs/bug-872923.t2
-rw-r--r--tests/bugs/glusterfs/bug-873962-spb.t1
-rwxr-xr-xtests/bugs/glusterfs/bug-902610.t2
-rwxr-xr-xtests/bugs/logging/bug-823081.t8
-rwxr-xr-xtests/bugs/md-cache/bug-1632503.t24
-rw-r--r--tests/bugs/md-cache/bug-1726205.t22
-rwxr-xr-xtests/bugs/nfs/bug-1053579.t2
-rw-r--r--tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t3
-rw-r--r--tests/bugs/nfs/bug-1157223-symlink-mounting.t2
-rw-r--r--tests/bugs/nfs/bug-1161092-nfs-acls.t2
-rwxr-xr-xtests/bugs/nfs/bug-1166862.t2
-rw-r--r--tests/bugs/nfs/bug-1210338.t2
-rwxr-xr-xtests/bugs/nfs/bug-847622.t2
-rwxr-xr-xtests/bugs/nfs/bug-877885.t2
-rwxr-xr-xtests/bugs/nfs/bug-904065.t2
-rwxr-xr-xtests/bugs/nfs/bug-915280.t2
-rwxr-xr-xtests/bugs/nfs/bug-974972.t2
-rw-r--r--tests/bugs/nfs/showmount-many-clients.t2
-rw-r--r--tests/bugs/nfs/socket-as-fifo.t2
-rw-r--r--tests/bugs/nfs/subdir-trailing-slash.t1
-rwxr-xr-xtests/bugs/nfs/zero-atime.t2
-rwxr-xr-xtests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t8
-rw-r--r--tests/bugs/posix/bug-1651445.t54
-rw-r--r--tests/bugs/protocol/bug-1321578.t53
-rw-r--r--tests/bugs/protocol/bug-1390914.t36
-rw-r--r--tests/bugs/protocol/bug-1433815-auth-allow.t1
-rw-r--r--tests/bugs/quota/bug-1035576.t3
-rw-r--r--tests/bugs/quota/bug-1087198.t2
-rw-r--r--tests/bugs/quota/bug-1153964.t2
-rw-r--r--tests/bugs/quota/bug-1243798.t2
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1436090.t12
-rw-r--r--tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t23
-rwxr-xr-xtests/bugs/replicate/bug-1046624.t3
-rw-r--r--tests/bugs/replicate/bug-1058797.t3
-rw-r--r--tests/bugs/replicate/bug-1101647.t2
-rw-r--r--tests/bugs/replicate/bug-1130892.t12
-rw-r--r--tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t5
-rw-r--r--tests/bugs/replicate/bug-1180545.t35
-rw-r--r--tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t10
-rw-r--r--tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t3
-rw-r--r--tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t18
-rw-r--r--tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t64
-rw-r--r--tests/bugs/replicate/bug-1493415-gfid-heal.t10
-rw-r--r--tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t2
-rw-r--r--tests/bugs/replicate/bug-1637249-gfid-heal.t149
-rw-r--r--tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t55
-rwxr-xr-xtests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t55
-rw-r--r--tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t95
-rw-r--r--tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t40
-rw-r--r--tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t38
-rwxr-xr-xtests/bugs/replicate/bug-1696599-io-hang.t47
-rw-r--r--tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t136
-rw-r--r--tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t116
-rw-r--r--tests/bugs/replicate/bug-1728770-pass-xattrs.t52
-rw-r--r--tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t102
-rw-r--r--tests/bugs/replicate/bug-1744548-heal-timeout.t47
-rw-r--r--tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t89
-rw-r--r--tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t111
-rw-r--r--tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t74
-rw-r--r--tests/bugs/replicate/bug-1801624-entry-heal.t58
-rwxr-xr-xtests/bugs/replicate/bug-830665.t2
-rw-r--r--tests/bugs/replicate/bug-880898.t7
-rwxr-xr-xtests/bugs/replicate/bug-977797.t7
-rw-r--r--tests/bugs/replicate/issue-1254-prioritize-enospc.t80
-rw-r--r--tests/bugs/replicate/mdata-heal-no-xattrs.t59
-rw-r--r--tests/bugs/replicate/ta-inode-refresh-read.t40
-rwxr-xr-xtests/bugs/rpc/bug-1043886.t2
-rwxr-xr-xtests/bugs/rpc/bug-847624.t3
-rwxr-xr-xtests/bugs/rpc/bug-921072.t2
-rwxr-xr-xtests/bugs/rpc/bug-954057.t10
-rw-r--r--tests/bugs/shard/bug-1272986.t6
-rw-r--r--tests/bugs/shard/bug-1669077.t29
-rw-r--r--tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t34
-rw-r--r--tests/bugs/shard/bug-1696136.c122
-rw-r--r--tests/bugs/shard/bug-1696136.t33
-rw-r--r--tests/bugs/shard/bug-1705884.t32
-rw-r--r--tests/bugs/shard/bug-1738419.t29
-rw-r--r--tests/bugs/shard/bug-shard-discard.c5
-rw-r--r--tests/bugs/shard/issue-1243.t43
-rw-r--r--tests/bugs/shard/issue-1281.t34
-rw-r--r--tests/bugs/shard/issue-1425.t45
-rw-r--r--tests/bugs/shard/shard-fallocate.c7
-rw-r--r--tests/bugs/shard/zero-flag.t1
-rw-r--r--tests/bugs/snapshot/bug-1109889.t4
-rwxr-xr-xtests/bugs/snapshot/bug-1111041.t10
-rw-r--r--tests/bugs/snapshot/bug-1140162-file-snapshot-features-encrypt-opts-validation.t43
-rwxr-xr-xtests/bugs/snapshot/bug-1166197.t2
-rw-r--r--tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t2
-rw-r--r--tests/bugs/snapshot/bug-1227646.t1
-rw-r--r--tests/bugs/snapshot/bug-1260848.t2
-rw-r--r--tests/bugs/snapshot/bug-1279327.t1
-rw-r--r--tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t1
-rw-r--r--tests/bugs/snapshot/bug-1597662.t3
-rwxr-xr-xtests/bugs/transport/bug-873367.t2
-rw-r--r--tests/bugs/write-behind/issue-884.c267
-rwxr-xr-xtests/bugs/write-behind/issue-884.t40
-rw-r--r--tests/cluster.rc41
-rw-r--r--tests/ec.rc9
-rwxr-xr-xtests/encryption/crypt.t90
-rw-r--r--tests/encryption/frag.c324
-rw-r--r--tests/env.rc.in5
-rwxr-xr-xtests/features/delay-gen.t12
-rw-r--r--tests/features/flock_interrupt.t32
-rw-r--r--tests/features/fuse-lru-limit.t43
-rw-r--r--tests/features/interrupt.t14
-rwxr-xr-xtests/features/ssl-authz.t26
-rw-r--r--tests/features/ssl-ciphers.t85
-rw-r--r--tests/features/subdir-mount.t11
-rwxr-xr-xtests/features/trash.t74
-rwxr-xr-xtests/features/worm.t39
-rw-r--r--tests/geo-rep.rc164
-rw-r--r--tests/gfid2path/gfid2path_nfs.t2
-rw-r--r--tests/glusterfind/glusterfind-basic.t84
-rw-r--r--tests/include.rc71
-rw-r--r--tests/line-coverage/afr-heal-info.t43
-rwxr-xr-xtests/line-coverage/arbiter-coverage.t32
-rw-r--r--tests/line-coverage/cli-peer-and-volume-operations.t135
-rw-r--r--tests/line-coverage/cli-volume-top-profile-coverage.t62
-rwxr-xr-xtests/line-coverage/errorgen-coverage.t42
-rw-r--r--tests/line-coverage/log-and-brick-ops-negative-case.t82
-rwxr-xr-xtests/line-coverage/meta-max-coverage.t33
-rw-r--r--tests/line-coverage/namespace-linecoverage.t39
-rwxr-xr-xtests/line-coverage/old-protocol.t37
-rwxr-xr-xtests/line-coverage/quiesce-coverage.t44
-rw-r--r--tests/line-coverage/shard-coverage.t33
-rw-r--r--tests/line-coverage/some-features-in-libglusterfs.t67
-rw-r--r--tests/line-coverage/volfile-with-all-graph-syntax.t73
-rw-r--r--tests/ssl.rc2
-rw-r--r--tests/thin-arbiter.rc45
-rw-r--r--tests/tier.rc159
-rw-r--r--tests/utils/changelog/changelog.h6
-rw-r--r--tests/utils/changelog/test-changelog-api.c98
-rw-r--r--tests/utils/changelog/test-history-api.c111
-rw-r--r--tests/utils/changelogparser.py5
-rwxr-xr-xtests/utils/create-files.py9
-rw-r--r--tests/utils/get-mdata-xattr.c152
-rwxr-xr-xtests/utils/gfid-access.py62
-rw-r--r--tests/utils/libcxattr.py22
-rw-r--r--tests/utils/py2py3.py186
-rw-r--r--tests/volume.rc171
-rw-r--r--tools/gfind_missing_files/gcrawler.c8
-rw-r--r--tools/gfind_missing_files/gfind_missing_files.sh2
-rwxr-xr-xtools/glusterfind/S57glusterfind-delete-post.py2
-rw-r--r--tools/glusterfind/glusterfind.in1
-rw-r--r--tools/glusterfind/src/Makefile.am2
-rw-r--r--tools/glusterfind/src/brickfind.py21
-rw-r--r--tools/glusterfind/src/changelog.py60
-rw-r--r--tools/glusterfind/src/conf.py6
-rw-r--r--tools/glusterfind/src/gfind_py2py3.py88
-rw-r--r--tools/glusterfind/src/libgfchangelog.py23
-rw-r--r--tools/glusterfind/src/main.py129
-rw-r--r--tools/glusterfind/src/nodeagent.py2
-rw-r--r--tools/glusterfind/src/utils.py13
-rw-r--r--tools/setgfid2path/src/main.c6
-rw-r--r--xlators/Makefile.am4
-rw-r--r--xlators/cluster/afr/src/afr-common.c2019
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c52
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c193
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c196
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h3
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c174
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c819
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h4
-rw-r--r--xlators/cluster/afr/src/afr-messages.h162
-rw-r--r--xlators/cluster/afr/src/afr-open.c64
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c71
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c472
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c76
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c272
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c80
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c104
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h49
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c473
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h23
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c592
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h2
-rw-r--r--xlators/cluster/afr/src/afr.c222
-rw-r--r--xlators/cluster/afr/src/afr.h414
-rw-r--r--xlators/cluster/dht/src/Makefile.am14
-rw-r--r--xlators/cluster/dht/src/dht-common.c2647
-rw-r--r--xlators/cluster/dht/src/dht-common.h424
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c18
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c47
-rw-r--r--xlators/cluster/dht/src/dht-helper.c333
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c143
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c94
-rw-r--r--xlators/cluster/dht/src/dht-layout.c97
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c55
-rw-r--r--xlators/cluster/dht/src/dht-lock.c190
-rw-r--r--xlators/cluster/dht/src/dht-lock.h1
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h5
-rw-r--r--xlators/cluster/dht/src/dht-messages.h322
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1144
-rw-r--r--xlators/cluster/dht/src/dht-rename.c52
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c668
-rw-r--r--xlators/cluster/dht/src/dht-shared.c151
-rw-r--r--xlators/cluster/dht/src/dht.c1
-rw-r--r--xlators/cluster/dht/src/dht.sym9
-rw-r--r--xlators/cluster/dht/src/nufa.c22
-rw-r--r--xlators/cluster/dht/src/nufa.sym8
-rw-r--r--xlators/cluster/dht/src/switch.c30
-rw-r--r--xlators/cluster/dht/src/switch.sym8
-rw-r--r--xlators/cluster/dht/src/tier-common.c1199
-rw-r--r--xlators/cluster/dht/src/tier-common.h55
-rw-r--r--xlators/cluster/dht/src/tier.c3090
-rw-r--r--xlators/cluster/dht/src/tier.h110
-rw-r--r--xlators/cluster/dht/src/tier.sym9
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_mock.c6
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_unittest.c4
-rw-r--r--xlators/cluster/ec/src/ec-code.c2
-rw-r--r--xlators/cluster/ec/src/ec-code.h4
-rw-r--r--xlators/cluster/ec/src/ec-combine.c44
-rw-r--r--xlators/cluster/ec/src/ec-common.c271
-rw-r--r--xlators/cluster/ec/src/ec-common.h53
-rw-r--r--xlators/cluster/ec/src/ec-data.c12
-rw-r--r--xlators/cluster/ec/src/ec-data.h2
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c39
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c112
-rw-r--r--xlators/cluster/ec/src/ec-fops.h146
-rw-r--r--xlators/cluster/ec/src/ec-galois.c3
-rw-r--r--xlators/cluster/ec/src/ec-generic.c110
-rw-r--r--xlators/cluster/ec/src/ec-heal.c375
-rw-r--r--xlators/cluster/ec/src/ec-heald.c165
-rw-r--r--xlators/cluster/ec/src/ec-heald.h9
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c12
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c107
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c199
-rw-r--r--xlators/cluster/ec/src/ec-locks.c110
-rw-r--r--xlators/cluster/ec/src/ec-mem-types.h2
-rw-r--r--xlators/cluster/ec/src/ec-messages.h5
-rw-r--r--xlators/cluster/ec/src/ec-method.h2
-rw-r--r--xlators/cluster/ec/src/ec-types.h40
-rw-r--r--xlators/cluster/ec/src/ec.c288
-rw-r--r--xlators/cluster/ec/src/ec.h1
-rw-r--r--xlators/cluster/stripe/src/Makefile.am22
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c658
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h29
-rw-r--r--xlators/cluster/stripe/src/stripe.c5612
-rw-r--r--xlators/cluster/stripe/src/stripe.h291
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-mem-types.h2
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-messages.h2
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.c19
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.h6
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h2
-rw-r--r--xlators/debug/error-gen/src/error-gen.c86
-rw-r--r--xlators/debug/error-gen/src/error-gen.h1
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h2
-rw-r--r--xlators/debug/io-stats/src/io-stats.c418
-rw-r--r--xlators/debug/sink/src/sink.c16
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h2
-rw-r--r--xlators/debug/trace/src/trace.c34
-rw-r--r--xlators/debug/trace/src/trace.h16
-rw-r--r--xlators/encryption/Makefile.am3
-rw-r--r--xlators/encryption/crypt/Makefile.am3
-rw-r--r--xlators/encryption/crypt/src/Makefile.am26
-rw-r--r--xlators/encryption/crypt/src/atom.c861
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h133
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h41
-rw-r--r--xlators/encryption/crypt/src/crypt.c3906
-rw-r--r--xlators/encryption/crypt/src/crypt.h931
-rw-r--r--xlators/encryption/crypt/src/data.c715
-rw-r--r--xlators/encryption/crypt/src/keys.c284
-rw-r--r--xlators/encryption/crypt/src/metadata.c575
-rw-r--r--xlators/encryption/crypt/src/metadata.h79
-rw-r--r--xlators/encryption/rot-13/Makefile.am3
-rw-r--r--xlators/experimental/Makefile.am3
-rw-r--r--xlators/experimental/README.md107
-rw-r--r--xlators/experimental/dht2/Makefile.am3
-rw-r--r--xlators/experimental/dht2/README.md47
-rw-r--r--xlators/experimental/dht2/TODO.md3
-rw-r--r--xlators/experimental/dht2/dht2-client/src/Makefile.am21
-rw-r--r--xlators/experimental/dht2/dht2-client/src/dht2-client-main.c58
-rw-r--r--xlators/experimental/dht2/dht2-common/src/dht2-common-map.c19
-rw-r--r--xlators/experimental/dht2/dht2-server/src/Makefile.am23
-rw-r--r--xlators/experimental/dht2/dht2-server/src/dht2-server-main.c58
-rw-r--r--xlators/experimental/fdl/src/Makefile.am48
-rw-r--r--xlators/experimental/fdl/src/dump-tmpl.c.in177
-rw-r--r--xlators/experimental/fdl/src/fdl-tmpl.c.in513
-rw-r--r--xlators/experimental/fdl/src/fdl.h30
-rwxr-xr-xxlators/experimental/fdl/src/gen_dumper.py117
-rwxr-xr-xxlators/experimental/fdl/src/gen_fdl.py354
-rwxr-xr-xxlators/experimental/fdl/src/gen_recon.py218
-rw-r--r--xlators/experimental/fdl/src/logdump.c51
-rw-r--r--xlators/experimental/fdl/src/recon-tmpl.c.in297
-rw-r--r--xlators/experimental/fdl/src/recon.c89
-rw-r--r--xlators/experimental/jbr-client/Makefile.am3
-rw-r--r--xlators/experimental/jbr-client/src/Makefile.am34
-rw-r--r--xlators/experimental/jbr-client/src/fop-template.c.in102
-rwxr-xr-xxlators/experimental/jbr-client/src/gen-fops.py58
-rw-r--r--xlators/experimental/jbr-client/src/jbr-messages.h30
-rw-r--r--xlators/experimental/jbr-client/src/jbrc.c311
-rw-r--r--xlators/experimental/jbr-client/src/jbrc.h27
-rw-r--r--xlators/experimental/jbr-server/Makefile.am3
-rw-r--r--xlators/experimental/jbr-server/src/Makefile.am39
-rw-r--r--xlators/experimental/jbr-server/src/all-templates.c.in501
-rwxr-xr-xxlators/experimental/jbr-server/src/gen-fops.py181
-rw-r--r--xlators/experimental/jbr-server/src/jbr-internal.h118
-rw-r--r--xlators/experimental/jbr-server/src/jbr.c1676
-rw-r--r--xlators/experimental/posix2/Makefile.am3
-rw-r--r--xlators/experimental/posix2/README.md7
-rw-r--r--xlators/experimental/posix2/TODO.md3
-rw-r--r--xlators/experimental/posix2/common/Makefile.am3
-rw-r--r--xlators/experimental/posix2/common/src/Makefile.am16
-rw-r--r--xlators/experimental/posix2/common/src/posix2-common.c18
-rw-r--r--xlators/experimental/posix2/ds/Makefile.am3
-rw-r--r--xlators/experimental/posix2/ds/src/Makefile.am22
-rw-r--r--xlators/experimental/posix2/ds/src/posix2-ds-main.c56
-rw-r--r--xlators/experimental/posix2/mds/Makefile.am3
-rw-r--r--xlators/experimental/posix2/mds/src/Makefile.am22
-rw-r--r--xlators/experimental/posix2/mds/src/posix2-mds-main.c56
-rw-r--r--xlators/features/Makefile.am6
-rw-r--r--xlators/features/arbiter/src/arbiter-mem-types.h2
-rw-r--r--xlators/features/arbiter/src/arbiter.c19
-rw-r--r--xlators/features/arbiter/src/arbiter.h4
-rw-r--r--xlators/features/barrier/src/barrier-mem-types.h2
-rw-r--r--xlators/features/barrier/src/barrier.c100
-rw-r--r--xlators/features/barrier/src/barrier.h9
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h53
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c12
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h20
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c98
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h2
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.h2
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c320
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h36
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-common.h2
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c97
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h3
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h77
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c469
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h54
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py4
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am2
-rw-r--r--xlators/features/changelog/lib/src/changelog-lib-messages.h32
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-api.c12
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c59
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h7
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal-handler.c35
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-reborp.c34
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.h2
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c22
-rw-r--r--xlators/features/changelog/lib/src/gf-history-changelog.c83
-rw-r--r--xlators/features/changelog/src/changelog-barrier.c17
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h4
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c23
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.h8
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c316
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h65
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h2
-rw-r--r--xlators/features/changelog/src/changelog-messages.h125
-rw-r--r--xlators/features/changelog/src/changelog-misc.h4
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.c73
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.h4
-rw-r--r--xlators/features/changelog/src/changelog-rpc.c185
-rw-r--r--xlators/features/changelog/src/changelog-rpc.h2
-rw-r--r--xlators/features/changelog/src/changelog-rt.c6
-rw-r--r--xlators/features/changelog/src/changelog-rt.h4
-rw-r--r--xlators/features/changelog/src/changelog.c373
-rw-r--r--xlators/features/changetimerecorder/Makefile.am3
-rw-r--r--xlators/features/changetimerecorder/src/Makefile.am26
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.c2357
-rw-r--r--xlators/features/changetimerecorder/src/changetimerecorder.h21
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.c293
-rw-r--r--xlators/features/changetimerecorder/src/ctr-helper.h854
-rw-r--r--xlators/features/changetimerecorder/src/ctr-messages.h61
-rw-r--r--xlators/features/changetimerecorder/src/ctr-xlator-ctx.c362
-rw-r--r--xlators/features/changetimerecorder/src/ctr-xlator-ctx.h68
-rw-r--r--xlators/features/changetimerecorder/src/ctr_mem_types.h22
-rw-r--r--xlators/features/cloudsync/src/Makefile.am4
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c8
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h2
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.c16
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.h43
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-c.py38
-rw-r--r--xlators/features/cloudsync/src/cloudsync-mem-types.h3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am6
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h2
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c6
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h8
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am (renamed from xlators/experimental/dht2/dht2-server/Makefile.am)0
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h203
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c842
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h84
-rw-r--r--xlators/features/cloudsync/src/cloudsync.c575
-rw-r--r--xlators/features/cloudsync/src/cloudsync.h32
-rw-r--r--xlators/features/compress/src/cdc-helper.c6
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h2
-rw-r--r--xlators/features/compress/src/cdc.c18
-rw-r--r--xlators/features/compress/src/cdc.h2
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h2
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c40
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h10
-rw-r--r--xlators/features/glupy/Makefile.am3
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/examples/Makefile.am5
-rw-r--r--xlators/features/glupy/examples/debug-trace.py777
-rw-r--r--xlators/features/glupy/examples/helloworld.py21
-rw-r--r--xlators/features/glupy/examples/negative.py93
-rw-r--r--xlators/features/glupy/src/Makefile.am36
-rw-r--r--xlators/features/glupy/src/glupy.c2446
-rw-r--r--xlators/features/glupy/src/glupy.h56
-rw-r--r--xlators/features/glupy/src/glupy.sym101
-rw-r--r--xlators/features/glupy/src/glupy/Makefile.am5
-rw-r--r--xlators/features/glupy/src/glupy/__init__.py852
-rw-r--r--xlators/features/glupy/src/setup.py.in24
-rw-r--r--xlators/features/index/src/index-mem-types.h6
-rw-r--r--xlators/features/index/src/index-messages.h2
-rw-r--r--xlators/features/index/src/index.c39
-rw-r--r--xlators/features/index/src/index.h10
-rw-r--r--xlators/features/leases/src/leases-internal.c73
-rw-r--r--xlators/features/leases/src/leases-mem-types.h2
-rw-r--r--xlators/features/leases/src/leases-messages.h2
-rw-r--r--xlators/features/leases/src/leases.c43
-rw-r--r--xlators/features/leases/src/leases.h64
-rw-r--r--xlators/features/locks/src/clear.c58
-rw-r--r--xlators/features/locks/src/clear.h8
-rw-r--r--xlators/features/locks/src/common.c578
-rw-r--r--xlators/features/locks/src/common.h87
-rw-r--r--xlators/features/locks/src/entrylk.c106
-rw-r--r--xlators/features/locks/src/inodelk.c249
-rw-r--r--xlators/features/locks/src/locks-mem-types.h2
-rw-r--r--xlators/features/locks/src/locks.h105
-rw-r--r--xlators/features/locks/src/pl-messages.h2
-rw-r--r--xlators/features/locks/src/posix.c1421
-rw-r--r--xlators/features/locks/src/reservelk.c78
-rw-r--r--xlators/features/locks/tests/unit-test.c12
-rw-r--r--xlators/features/marker/src/marker-common.c7
-rw-r--r--xlators/features/marker/src/marker-common.h4
-rw-r--r--xlators/features/marker/src/marker-mem-types.h2
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c95
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h12
-rw-r--r--xlators/features/marker/src/marker-quota.c92
-rw-r--r--xlators/features/marker/src/marker-quota.h17
-rw-r--r--xlators/features/marker/src/marker.c48
-rw-r--r--xlators/features/marker/src/marker.h8
-rw-r--r--xlators/features/metadisp/Makefile.am (renamed from xlators/experimental/fdl/Makefile.am)0
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/namespace/src/namespace.c20
-rw-r--r--xlators/features/namespace/src/namespace.h4
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h2
-rw-r--r--xlators/features/quiesce/src/quiesce-messages.h2
-rw-r--r--xlators/features/quiesce/src/quiesce.c64
-rw-r--r--xlators/features/quiesce/src/quiesce.h4
-rw-r--r--xlators/features/quota/src/Makefile.am5
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c20
-rw-r--r--xlators/features/quota/src/quota-mem-types.h2
-rw-r--r--xlators/features/quota/src/quota-messages.h2
-rw-r--r--xlators/features/quota/src/quota.c268
-rw-r--r--xlators/features/quota/src/quota.h29
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c89
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h10
-rw-r--r--xlators/features/quota/src/quotad-helpers.c6
-rw-r--r--xlators/features/quota/src/quotad.c38
-rw-r--r--xlators/features/quota/src/quotad.sym7
-rw-r--r--xlators/features/read-only/src/read-only-common.c2
-rw-r--r--xlators/features/read-only/src/read-only-common.h4
-rw-r--r--xlators/features/read-only/src/read-only-mem-types.h2
-rw-r--r--xlators/features/read-only/src/read-only.c14
-rw-r--r--xlators/features/read-only/src/read-only.h15
-rw-r--r--xlators/features/read-only/src/worm-helper.c24
-rw-r--r--xlators/features/read-only/src/worm.c144
-rw-r--r--xlators/features/sdfs/src/sdfs-messages.h2
-rw-r--r--xlators/features/sdfs/src/sdfs.c29
-rw-r--r--xlators/features/sdfs/src/sdfs.h6
-rw-r--r--xlators/features/selinux/src/selinux-mem-types.h2
-rw-r--r--xlators/features/selinux/src/selinux-messages.h2
-rw-r--r--xlators/features/selinux/src/selinux.c25
-rw-r--r--xlators/features/selinux/src/selinux.h2
-rw-r--r--xlators/features/shard/src/shard-mem-types.h2
-rw-r--r--xlators/features/shard/src/shard-messages.h2
-rw-r--r--xlators/features/shard/src/shard.c914
-rw-r--r--xlators/features/shard/src/shard.h26
-rw-r--r--xlators/features/snapview-client/src/snapview-client-mem-types.h2
-rw-r--r--xlators/features/snapview-client/src/snapview-client-messages.h39
-rw-r--r--xlators/features/snapview-client/src/snapview-client.c726
-rw-r--r--xlators/features/snapview-client/src/snapview-client.h15
-rw-r--r--xlators/features/snapview-server/src/snapview-server-helpers.c46
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mem-types.h2
-rw-r--r--xlators/features/snapview-server/src/snapview-server-messages.h4
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mgmt.c45
-rw-r--r--xlators/features/snapview-server/src/snapview-server.c49
-rw-r--r--xlators/features/snapview-server/src/snapview-server.h35
-rw-r--r--xlators/features/thin-arbiter/src/Makefile.am2
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h2
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-messages.h2
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.c23
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.h12
-rw-r--r--xlators/features/trash/src/trash-mem-types.h2
-rw-r--r--xlators/features/trash/src/trash.c23
-rw-r--r--xlators/features/trash/src/trash.h10
-rw-r--r--xlators/features/upcall/src/upcall-cache-invalidation.h6
-rw-r--r--xlators/features/upcall/src/upcall-internal.c207
-rw-r--r--xlators/features/upcall/src/upcall-mem-types.h2
-rw-r--r--xlators/features/upcall/src/upcall-messages.h2
-rw-r--r--xlators/features/upcall/src/upcall.c142
-rw-r--r--xlators/features/upcall/src/upcall.h34
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.c10
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.h2
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-c.py28
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-h.py2
-rw-r--r--xlators/features/utime/src/utime-helpers.c11
-rw-r--r--xlators/features/utime/src/utime-helpers.h7
-rw-r--r--xlators/features/utime/src/utime-mem-types.h2
-rw-r--r--xlators/features/utime/src/utime-messages.h5
-rw-r--r--xlators/features/utime/src/utime.c186
-rw-r--r--xlators/features/utime/src/utime.h6
-rw-r--r--xlators/lib/src/libxlator.c15
-rw-r--r--xlators/lib/src/libxlator.h14
-rw-r--r--xlators/meta/src/active-link.c4
-rw-r--r--xlators/meta/src/cmdline-file.c8
-rw-r--r--xlators/meta/src/frames-file.c11
-rw-r--r--xlators/meta/src/graph-dir.c4
-rw-r--r--xlators/meta/src/graphs-dir.c4
-rw-r--r--xlators/meta/src/history-file.c8
-rw-r--r--xlators/meta/src/logfile-link.c4
-rw-r--r--xlators/meta/src/logging-dir.c4
-rw-r--r--xlators/meta/src/loglevel-file.c6
-rw-r--r--xlators/meta/src/mallinfo-file.c6
-rw-r--r--xlators/meta/src/measure-file.c6
-rw-r--r--xlators/meta/src/meminfo-file.c8
-rw-r--r--xlators/meta/src/meta-defaults.c12
-rw-r--r--xlators/meta/src/meta-helpers.c13
-rw-r--r--xlators/meta/src/meta-hooks.h2
-rw-r--r--xlators/meta/src/meta-mem-types.h2
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h2
-rw-r--r--xlators/meta/src/name-file.c8
-rw-r--r--xlators/meta/src/option-file.c4
-rw-r--r--xlators/meta/src/options-dir.c4
-rw-r--r--xlators/meta/src/private-file.c8
-rw-r--r--xlators/meta/src/process_uuid-file.c8
-rw-r--r--xlators/meta/src/profile-file.c8
-rw-r--r--xlators/meta/src/root-dir.c4
-rw-r--r--xlators/meta/src/subvolume-link.c4
-rw-r--r--xlators/meta/src/subvolumes-dir.c4
-rw-r--r--xlators/meta/src/top-link.c4
-rw-r--r--xlators/meta/src/type-file.c8
-rw-r--r--xlators/meta/src/version-file.c8
-rw-r--r--xlators/meta/src/view-dir.c4
-rw-r--r--xlators/meta/src/volfile-file.c6
-rw-r--r--xlators/meta/src/xlator-dir.c4
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am29
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitrot.c97
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1121
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-errno.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c927
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c295
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c1237
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c385
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c126
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c118
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h158
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c247
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c896
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c69
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c1398
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h29
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c346
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.h9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c40
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h16
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c96
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rcu.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c634
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c40
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-reset-brick.c26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c190
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c20
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c153
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c674
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.h17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c275
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h13
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c29
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c376
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c409
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c1681
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h58
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c836
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.h43
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c284
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c199
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h4
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tier.c1378
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.h37
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc.c503
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc.h41
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c3387
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h106
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1966
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c906
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c369
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c395
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h507
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c904
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h116
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c111
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h2
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in69
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in19
-rw-r--r--xlators/nfs/server/src/acl3.c46
-rw-r--r--xlators/nfs/server/src/acl3.h4
-rw-r--r--xlators/nfs/server/src/auth-cache.c15
-rw-r--r--xlators/nfs/server/src/auth-cache.h2
-rw-r--r--xlators/nfs/server/src/exports.c16
-rw-r--r--xlators/nfs/server/src/exports.h2
-rw-r--r--xlators/nfs/server/src/mount3-auth.c2
-rw-r--r--xlators/nfs/server/src/mount3.c88
-rw-r--r--xlators/nfs/server/src/mount3.h12
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c10
-rw-r--r--xlators/nfs/server/src/netgroups.c2
-rw-r--r--xlators/nfs/server/src/netgroups.h2
-rw-r--r--xlators/nfs/server/src/nfs-common.c17
-rw-r--r--xlators/nfs/server/src/nfs-common.h6
-rw-r--r--xlators/nfs/server/src/nfs-fops.c10
-rw-r--r--xlators/nfs/server/src/nfs-fops.h8
-rw-r--r--xlators/nfs/server/src/nfs-generics.c2
-rw-r--r--xlators/nfs/server/src/nfs-generics.h2
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c2
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h8
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h2
-rw-r--r--xlators/nfs/server/src/nfs-messages.h2
-rw-r--r--xlators/nfs/server/src/nfs.c71
-rw-r--r--xlators/nfs/server/src/nfs.h8
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c16
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h6
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c12
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h2
-rw-r--r--xlators/nfs/server/src/nfs3.c73
-rw-r--r--xlators/nfs/server/src/nfs3.h10
-rw-r--r--xlators/nfs/server/src/nfsserver.sym10
-rw-r--r--xlators/nfs/server/src/nlm4.c122
-rw-r--r--xlators/nfs/server/src/nlm4.h14
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c7
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/decompounder/Makefile.am1
-rw-r--r--xlators/performance/decompounder/src/Makefile.am19
-rw-r--r--xlators/performance/decompounder/src/decompounder-mem-types.h17
-rw-r--r--xlators/performance/decompounder/src/decompounder-messages.h28
-rw-r--r--xlators/performance/decompounder/src/decompounder.c833
-rw-r--r--xlators/performance/decompounder/src/decompounder.h78
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h41
-rw-r--r--xlators/performance/io-cache/src/io-cache.c292
-rw-r--r--xlators/performance/io-cache/src/io-cache.h45
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c14
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h2
-rw-r--r--xlators/performance/io-cache/src/page.c67
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h16
-rw-r--r--xlators/performance/io-threads/src/io-threads.c158
-rw-r--r--xlators/performance/io-threads/src/io-threads.h18
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h2
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h2
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h2
-rw-r--r--xlators/performance/md-cache/src/md-cache.c618
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c59
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-mem-types.h2
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-messages.h2
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c22
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h10
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h2
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h8
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1374
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h2
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h2
-rw-r--r--xlators/performance/quick-read/src/quick-read.c64
-rw-r--r--xlators/performance/quick-read/src/quick-read.h22
-rw-r--r--xlators/performance/read-ahead/src/page.c18
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h2
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h2
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c41
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h10
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h2
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h2
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c181
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h2
-rw-r--r--xlators/performance/symlink-cache/Makefile.am3
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am16
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache-messages.h30
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c360
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h2
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h2
-rw-r--r--xlators/performance/write-behind/src/write-behind.c134
-rw-r--r--xlators/playground/rot-13/Makefile.am (renamed from xlators/cluster/stripe/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/Makefile.am (renamed from xlators/encryption/rot-13/src/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/rot-13.c (renamed from xlators/encryption/rot-13/src/rot-13.c)6
-rw-r--r--xlators/playground/rot-13/src/rot-13.h (renamed from xlators/encryption/rot-13/src/rot-13.h)0
-rw-r--r--xlators/playground/template/src/template.c2
-rw-r--r--xlators/playground/template/src/template.h14
-rw-r--r--xlators/protocol/auth/addr/src/addr.c18
-rw-r--r--xlators/protocol/client/src/client-callback.c91
-rw-r--r--xlators/protocol/client/src/client-common.c66
-rw-r--r--xlators/protocol/client/src/client-common.h10
-rw-r--r--xlators/protocol/client/src/client-handshake.c762
-rw-r--r--xlators/protocol/client/src/client-helpers.c2360
-rw-r--r--xlators/protocol/client/src/client-lk.c47
-rw-r--r--xlators/protocol/client/src/client-mem-types.h2
-rw-r--r--xlators/protocol/client/src/client-messages.h125
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c1137
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c1276
-rw-r--r--xlators/protocol/client/src/client.c1356
-rw-r--r--xlators/protocol/client/src/client.h222
-rw-r--r--xlators/protocol/server/src/Makefile.am4
-rw-r--r--xlators/protocol/server/src/authenticate.h8
-rw-r--r--xlators/protocol/server/src/server-common.c53
-rw-r--r--xlators/protocol/server/src/server-common.h11
-rw-r--r--xlators/protocol/server/src/server-handshake.c133
-rw-r--r--xlators/protocol/server/src/server-helpers.c4345
-rw-r--r--xlators/protocol/server/src/server-helpers.h35
-rw-r--r--xlators/protocol/server/src/server-mem-types.h2
-rw-r--r--xlators/protocol/server/src/server-messages.h181
-rw-r--r--xlators/protocol/server/src/server-resolve.c39
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c404
-rw-r--r--xlators/protocol/server/src/server-rpc-fops_v2.c1650
-rw-r--r--xlators/protocol/server/src/server.c340
-rw-r--r--xlators/protocol/server/src/server.h88
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am21
-rw-r--r--xlators/storage/bd/src/bd-aio.c518
-rw-r--r--xlators/storage/bd/src/bd-aio.h40
-rw-r--r--xlators/storage/bd/src/bd-helper.c1073
-rw-r--r--xlators/storage/bd/src/bd-mem-types.h26
-rw-r--r--xlators/storage/bd/src/bd.c2426
-rw-r--r--xlators/storage/bd/src/bd.h189
-rw-r--r--xlators/storage/posix/src/Makefile.am2
-rw-r--r--xlators/storage/posix/src/posix-aio.c14
-rw-r--r--xlators/storage/posix/src/posix-aio.h3
-rw-r--r--xlators/storage/posix/src/posix-common.c341
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c445
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.c116
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.h13
-rw-r--r--xlators/storage/posix/src/posix-handle.c209
-rw-r--r--xlators/storage/posix/src/posix-handle.h33
-rw-r--r--xlators/storage/posix/src/posix-helpers.c902
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c1040
-rw-r--r--xlators/storage/posix/src/posix-inode-handle.h20
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h2
-rw-r--r--xlators/storage/posix/src/posix-messages.h6
-rw-r--r--xlators/storage/posix/src/posix-metadata.c561
-rw-r--r--xlators/storage/posix/src/posix-metadata.h25
-rw-r--r--xlators/storage/posix/src/posix.c34
-rw-r--r--xlators/storage/posix/src/posix.h181
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h2
-rw-r--r--xlators/system/posix-acl/src/posix-acl-messages.h2
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h6
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c222
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h7
-rw-r--r--xlators/xlator.sym10
1439 files changed, 82667 insertions, 101108 deletions
diff --git a/.github/ISSUE_TEMPLATE b/.github/ISSUE_TEMPLATE
index 306b4eebc15..386ed2d8dd5 100644
--- a/.github/ISSUE_TEMPLATE
+++ b/.github/ISSUE_TEMPLATE
@@ -1,13 +1,30 @@
-This repository (glusterfs) uses github issues to track feature requests *only*.
+<!-- Please use this template while reporting an issue, providing as much information as possible. Failure to do so may result in a delayed response. Thank you! -->
-For assistance with bugs, please file a bug in our bugzilla instance [1]
+**Description of problem:**
-For other queries regarding glusterfs, please post to the users [2] or devel [3] lists as appropriate.
-You may further want to subscribe to these lists or view the archives for these lists, see [4] [5].
+**The exact command to reproduce the issue**:
+
+
+**The full output of the command that failed**:
+<details>
+
+
+
+</details>
+
+**Expected results:**
+
+
+**Additional info:**
+
+
+**- The output of the `gluster volume info` command**:
+<details>
+
+
+
+</details>
+
+**- The operating system / glusterfs version**:
-[1] glusterfs Bugzilla: https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS
-[2] Gluster users list ID: gluster-users@gluster.org
-[3] Gluster devel list ID: gluster-devel@gluster.org
-[4] Gluster users list subscribe/view archives: https://lists.gluster.org/mailman/listinfo/gluster-users
-[5] Gluster devel list subscribe/view archives: https://lists.gluster.org/mailman/listinfo/gluster-devel
diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE
index 0ec6eb319ee..e69de29bb2d 100644
--- a/.github/PULL_REQUEST_TEMPLATE
+++ b/.github/PULL_REQUEST_TEMPLATE
@@ -1,33 +0,0 @@
-Many thanks for your interest in improving GlusterFS!
-
-GlusterFS does not use GitHub Pull-Requests. Instead, changes are reviewed
-on the Gerrit instance of the Gluster Community at https://review.gluster.org
-
-In order to send your changes for review, follow these steps:
-
-1. login on https://review.gluster.org with your GitHub account
-2. add a public ssh-key to your profile on https://review.gluster.org/#/settings/ssh-keys
-3. add the Gerrit remote to your locally cloned git repository
-
- $ git remote add gerrit ssh://$USER@review.gluster.org/glusterfs.git
-
-4. configure the commit hooks
-
- $ git review --setup
-
-5. post your changes to Gerrit
-
- $ git review
-
-
-You may need to install the 'git-review' package if 'git review' is not
-available. Note that the hooks for the repository make sure to add a ChangeId
-label in the commit messages. Gerrit uses the ChangeId to track single patches
-and its updated versions.
-
-For more details, see the documented development workflow at
- http://gluster.readthedocs.io/en/latest/Developer-guide/Simplified-Development-Workflow/
-
-If there are any troubles or difficulties with these instructions, please
-contact us on gluster-devel@gluster.org or on Freenode IRC in the #gluster-dev
-channel.
diff --git a/.github/RELEASE_TRACKER_TEMPLATE b/.github/RELEASE_TRACKER_TEMPLATE
new file mode 100644
index 00000000000..502bbd5556c
--- /dev/null
+++ b/.github/RELEASE_TRACKER_TEMPLATE
@@ -0,0 +1,12 @@
+<!-- Please use this template while creating a tracker issue -->
+
+**Description of problem:**
+A tracker issue to track the issues that will be fixed as a part of this release
+
+
+**Major or minor release**:
+
+
+**Release version**:
+
+
diff --git a/.github/stale.yml b/.github/stale.yml
new file mode 100644
index 00000000000..460e327c6ea
--- /dev/null
+++ b/.github/stale.yml
@@ -0,0 +1,25 @@
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 210
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 15
+# Issues with these labels will never be considered stale
+exemptLabels:
+ - pinned
+ - security
+# Label to use when marking an issue as stale
+staleLabel: wontfix
+
+# Comment to post when marking an issue as stale. Set to `false` to disable
+markComment: >
+ Thank you for your contributions.
+
+ Noticed that this issue is not having any activity in last ~6 months! We
+ are marking this issue as stale because it has not had recent activity.
+
+ It will be closed in 2 weeks if no one responds with a comment here.
+
+
+# Comment to post when closing a stale issue. Set to `false` to disable
+closeComment: >
+ Closing this issue as there was no update since my last update on issue.
+ If this is an issue which is still valid, feel free to open it.
diff --git a/.gitignore b/.gitignore
index 4b369515d3e..fc5ba586f8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,7 @@ test-driver
*.patch
.libs
.deps
+.dirstamp
# Softlinks to test and log
log
@@ -60,6 +61,7 @@ extras/ocf/glusterd
extras/ocf/volume
extras/run-gluster.tmpfiles
extras/systemd/glusterd.service
+extras/systemd/gluster-ta-volume.service
extras/systemd/glusterfssharedstorage.service
extras/who-wrote-glusterfs/gitdm
geo-replication/.tox
@@ -85,17 +87,14 @@ glusterfsd/src/glusterfsd
glusterfsd/src/gf_attach
heal/src/glfsheal
libgfchangelog.pc
-libgfdb.pc
libglusterfs/src/graph.lex.c
libglusterfs/src/y.tab.c
libglusterfs/src/y.tab.h
libglusterfs/src/defaults.c
-libglusterfs/src/glusterfs-fops.h
libglusterfs/src/cli1-xdr.h
libglusterfs/src/protocol-common.h
libtool
# copied XDR for cyclic libglusterfs <-> rpc-header dependency
-rpc/xdr/gen/*.x
run-tests.sh
!tests/basic/fuse/Makefile
!tests/basic/gfapi/Makefile
@@ -109,14 +108,6 @@ extras/peer_add_secret_pub
tools/gfind_missing_files/gcrawler
tools/glusterfind/glusterfind
tools/glusterfind/src/tool.conf
-# Generated by fdl xlator
-xlators/experimental/fdl/src/fdl.c
-xlators/experimental/fdl/src/gf_logdump
-xlators/experimental/fdl/src/gf_recon
-xlators/experimental/fdl/src/libfdl.c
-xlators/experimental/fdl/src/librecon.c
-xlators/experimental/jbr-client/src/jbrc-cg.c
-xlators/experimental/jbr-server/src/jbr-cg.c
# Eventing
events/src/eventsapiconf.py
extras/systemd/glustereventsd.service
@@ -130,3 +121,5 @@ xlators/features/cloudsync/src/cloudsync-autogen-fops.c
xlators/features/cloudsync/src/cloudsync-autogen-fops.h
xlators/features/utime/src/utime-autogen-fops.c
xlators/features/utime/src/utime-autogen-fops.h
+tests/basic/metadisp/ftruncate
+xlators/features/metadisp/src/fops.c
diff --git a/.testignore b/.testignore
index 190573191e1..fe8f838bf2b 100644
--- a/.testignore
+++ b/.testignore
@@ -1,4 +1,6 @@
.github/ISSUE_TEMPLATE
+.github/PULL_REQUEST_TEMPLATE
+.github/stale.yml
.gitignore
.mailmap
.testignore
@@ -6,7 +8,7 @@
rfc.sh
submit-for-review.sh
AUTHORS
-CONTRIBUTING
+CONTRIBUTING.md
COPYING-GPLV2
COPYING-LGPLV3
ChangeLog
diff --git a/CONTRIBUTING b/CONTRIBUTING
deleted file mode 100644
index 7bccd88d7e5..00000000000
--- a/CONTRIBUTING
+++ /dev/null
@@ -1,25 +0,0 @@
- Developer's Certificate of Origin 1.1
-
- By making a contribution to this project, I certify that:
-
- (a) The contribution was created in whole or in part by me and I
- have the right to submit it under the open source license
- indicated in the file; or
-
- (b) The contribution is based upon previous work that, to the best
- of my knowledge, is covered under an appropriate open source
- license and I have the right under that license to submit that
- work with modifications, whether created in whole or in part
- by me, under the same open source license (unless I am
- permitted to submit under a different license), as indicated
- in the file; or
-
- (c) The contribution was provided directly to me by some other
- person who certified (a), (b) or (c) and I have not modified
- it.
-
- (d) I understand and agree that this project and the contribution
- are public and that a record of the contribution (including all
- personal information I submit with it, including my sign-off) is
- maintained indefinitely and may be redistributed consistent with
- this project or the open source license(s) involved.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000000..65fc3497104
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,114 @@
+# GlusterFS project Contribution guidelines
+
+## Development Workflow
+
+We follow most of the details as per the [document here](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests). If you are not aware of the github workflow, it is recommended to go through them before continuing here.
+
+
+#### Get the Repository setup
+
+0. Fork Repository
+ - Fork [GlusterFS repository](https://github.com/gluster/glusterfs/fork).
+
+1. Clone Repository
+ - Clone the glusterfs repo freshly from github using below steps.
+
+```
+ git clone git@github.com:${username}/glusterfs.git
+ cd glusterfs/
+ git remote add upstream git@github.com:gluster/glusterfs.git
+```
+
+About two tasks are one time for the life time. You can continue to use the same repository for all the work in future.
+
+#### Development & Other flows
+
+0. Issue:
+ - Make sure there is an issue filed for the task you are working on.
+ - If it is not filed, open the issue with all the description.
+ - If it is a bug fix, add label "Type:Bug".
+ - If it is an RFC, provide all the documentation, and request for "DocApproved", and "SpecApproved" label.
+
+1. Code:
+ - Start coding
+ - Build and test locally
+ - Make sure clang-format is installed and is run on the patch.
+
+2. Keep up-to-date
+ - GlusterFS is a large project with many developers, so there would be one or the other patch everyday.
+ - It is critical for developer to be up-to-date with `devel` repo to be Conflict-Free when PR is opened.
+ - Git provides many options to keep up-to-date, below is one of them
+```
+ git fetch upstream
+ git rebase upstream/devel
+```
+ - It is recommended you keep pushing to your repo every day, so you don't loose any work.
+ - It can be done by `./rfc.sh` (or `git push origin HEAD:issueNNN`)
+
+2. Commit Message / PR description:
+ - The name of the branch on your personal fork can start with issueNNNN, followed by anything of your choice.
+ - PRs continue to have the title of format "component: \<title\>", like it is practiced now.
+ - When you open a PR, having a reference Issue for the commit is mandatory in GlusterFS.
+ - Commit message can have, either `Fixes: #NNNN` or `Updates: #NNNN` in a separate line in the commit message.
+ - Here, NNNN is the Issue ID in glusterfs repository.
+ - Each commit needs the author to have the "Signed-off-by: Name \<email\>" line.
+ - Can do this by `-s` option for `git commit`.
+ - If the PR is not ready for review, apply the label `work-in-progress`.
+ - Check the availability of "Draft PR" is present for you, if yes, use that instead.
+
+3. Tests:
+ - All the required smoke tests would be auto-triggered.
+ - Developers get a chance to retrigger the smoke tests using **"/recheck smoke"** as comment.
+ - The "regression" tests would be triggered by a comment **"/run regression"** from developers in the [@gluster-maintainers](https://github.com/orgs/gluster/teams/gluster-maintainers) group.
+ - Ask for help as comment in PR if you have any questions about the process!
+
+4. Review Process:
+ - `+2` : is equivalent to "Approve" from the people in the maintainer's group.
+ - `+1` : can be given by a maintainer/reviewer by explicitly stating that in the comment.
+ - `-1` : provide details on required changes and pick "Request Changes" while submitting your review.
+ - `-2` : done by adding the `DO-NOT-MERGE` label.
+
+ - Any further discussions can happen as comments in the PR.
+
+5. Making changes:
+ - There are 2 approaches to submit changes done after addressing review comments.
+ - Commit changes as a new commit on top of the original commits in the branch, and push the changes to same branch (issueNNNN)
+ - Commit changes into the same commit with `--amend` option, and do a push to the same branch with `--force` option.
+
+6. Merging:
+ - GlusterFS project follows 'Squash and Merge' method
+ - This is mainly to preserve the historic Gerrit method of one patch in `git log` for one URL link.
+ - This also makes every merge a complete patch, which has passed all tests.
+ - The merging of the patch is expected to be done by the maintainers.
+ - It can be done when all the tests (smoke and regression) pass.
+ - When the PR has 'Approved' flag from corresponding maintainer.
+ - If you feel there is delay, feel free to add a comment, discuss the same in Slack channel, or send email.
+
+## By contributing to this project, the contributor would need to agree to below.
+
+### Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
+
diff --git a/MAINTAINERS b/MAINTAINERS
index b1fc0eeaf41..953e8755fd9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -51,15 +51,10 @@ Descriptions of section entries:
General Project Architects
--------------------------
-M: Jeff Darcy <jeff@pl.atyp.us>
-M: Vijay Bellur <vbellur@redhat.com>
-P: Amar Tumballi <amarts@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
-P: Raghavendra Gowdappa <rgowdapp@redhat.com>
-P: Shyamsundar Ranganathan <srangana@redhat.com>
-P: Niels de Vos <ndevos@redhat.com>
-P: Xavier Hernandez <xhernandez@datalab.es>
-
+M: Amar Tumballi <amarts@gmail.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Atin Mukherjee <amukherj@redhat.com>
xlators:
--------
@@ -71,13 +66,14 @@ F: xlators/system/posix-acl/
Arbiter
M: Ravishankar N <ravishankar@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
S: Maintained
F: xlators/features/arbiter/
Automatic File Replication (AFR)
-M: Pranith Karampuri <pkarampu@redhat.com>
-P: Ravishankar N <ravishankar@redhat.com>
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+M: Ravishankar N <ravishankar@redhat.com>
+P: Karthik US <ksubrahm@redhat.com>
S: Maintained
F: xlators/cluster/afr/
@@ -87,10 +83,6 @@ P: Atin Mukherjee <amukherj@redhat.com>
S: Maintained
F: xlators/features/barrier
-Block Device
-S: Orphan
-F: xlators/storage/bd/
-
BitRot
M: Kotresh HR <khiremat@redhat.com>
P: Raghavendra Bhat <rabhat@redhat.com>
@@ -103,24 +95,15 @@ P: Kotresh HR <khiremat@redhat.com>
S: Maintained
F: xlators/features/changelog/
-Decompounder
-M: Krutika Dhananjay <kdhananj@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
-S: Maintained
-F: xlators/features/decompounder/
-
Distributed Hashing Table (DHT)
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
-M: Nithya Balachandran <nbalacha@redhat.com>
P: Susant Palai <spalai@redhat.com>
S: Maintained
F: xlators/cluster/dht/
Erasure Coding
-M: Pranith Karampuri <pkarampu@redhat.com>
-M: Xavier Hernandez <xhernandez@datalab.es>
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
P: Ashish Pandey <aspandey@redhat.com>
-P: Sunil Kumar Acharya <sheggodu@redhat.com>
S: Maintained
F: xlators/cluster/ec/
@@ -130,21 +113,19 @@ S: Maintained
F: xlators/debug/error-gen/
FUSE Bridge
-M: Niels de Vos <ndevos@redhat.com>
-P: Csaba Henk <chenk@redhat.com>
+M: Csaba Henk <chenk@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
S: Maintained
F: xlators/mount/
Index
-M: Pranith Karampuri <pkarampu@redhat.com>
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
P: Ravishankar N <ravishankar@redhat.com>
S: Maintained
F: xlators/features/index/
IO Cache
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
-P: Nithya Balachandran <nbalacha@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
S: Maintained
F: xlators/performance/io-cache/
@@ -155,7 +136,7 @@ S: Maintained
F: xlators/debug/io-stats/
IO threads
-M: Pranith Karampuri <pkarampu@redhat.com>
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
P: Ravishankar N <ravishankar@redhat.com>
S: Maintained
F: xlators/performance/io-threads/
@@ -169,18 +150,17 @@ F: xlators/features/leases/
Locks
M: Krutika Dhananjay <kdhananj@redhat.com>
+P: Xavier Hernandez <xhernandez@redhat.com>
S: Maintained
F: xlators/features/locks/
Marker
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
M: Kotresh HR <khiremat@redhat.com>
-P: Sanoj Unnikrishnan <sunnikri@redhat.com>
S: Maintained
F: xlators/features/marker/
Meta
-M: Mohammed Rafi KC <rkavunga@redhat.com>
+M: Mohammed Rafi KC <rafi.kavungal@iternity.com>
S: Maintained
F: xlators/features/meta/
@@ -192,53 +172,44 @@ F: xlators/performance/md-cache/
Negative-lookup Cache
M: Poornima G <pgurusid@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
S: Maintained
F: xlators/performance/nl-cache/
-NFS
-M: Shreyas Siravara <sshreyas@fb.com>
-M: Jeff Darcy <jeff@pl.atyp.us>
-P: Jiffin Tony Thottan <jthottan@redhat.com>
-P: Soumya Koduri <skoduri@redhat.com>
-S: Maintained
+gNFS
+M: Jiffin Tony Thottan <jthottan@redhat.com>
+P: Xie Changlong <xiechanglong@cmss.chinamobile.com>
+P: Amar Tumballi <amarts@gmail.com>
+S: Odd Fixes
F: xlators/nfs/server/
Open-behind
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
-P: Milind Changire <mchangir@redhat.com>
S: Maintained
F: xlators/performance/open-behind/
Posix:
M: Raghavendra Bhat <raghavendra@redhat.com>
+P: Kotresh HR <khiremat@redhat.com>
P: Krutika Dhananjay <kdhananj@redhat.com>
-P: Jiffin Tony Thottan <jthottan@redhat.com>
S: Maintained
F: xlators/storage/posix/
Quick-read
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
-P: Milind Changire <mchangir@redhat.com>
S: Maintained
F: xlators/performance/quick-read/
Quota
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
-P: Sanoj Unnikrishnan <sunnikri@redhat.com>
M: Shyamsundar Ranganathan <srangana@redhat.com>
+P: Hari Gowtham <hgowtham@redhat.com>
S: Maintained
F: xlators/features/quota/
Read-ahead
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
P: Csaba Henk <chenk@redhat.com>
S: Maintained
F: xlators/performance/read-ahead/
Readdir-ahead
-M: Poornima G <pgurusid@redhat.com>
-P: Krutika Dhananjay <kdhananj@redhat.com>
S: Maintained
F: xlators/performance/readdir-ahead/
@@ -262,7 +233,6 @@ S: Maintained
F: xlators/features/upcall/
Write-behind
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
P: Csaba Henk <chenk@redhat.com>
S: Maintained
F: xlators/performance/write-behind/
@@ -277,45 +247,24 @@ M: Susant Kumar Palai <spalai@redhat.com>
S: Maintained
F: xlators/features/cloudsync/
-Experimental Features:
-----------------------
-
-RIO-Distribution
-M: Shyamsundar Ranganathan <srangana@redhat.com>
-P: Kotresh HR <khiremat@redhat.com>
-P: Susant Palai <spalai@redhat.com>
-S: Maintained
-F: xlators/experimental/dht2/
-
-Journal Based Replication
-M: Jeff Darcy <jeff@pl.atyp.us>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
-S: Maintained
-F: xlators/experimental/fdl/
-F: xlators/experimenta/jbr-client/
-F: xlators/experimental/jbr-server/
-
-
Other bits of code:
-------------------
Doc
M: Humble Chirammal <hchiramm@redhat.com>
M: Raghavendra Talur <rtalur@redhat.com>
-M: Prashanth Pai <ppai@redhat.com>
S: Maintained
F: doc/
Geo Replication
M: Aravinda V K <avishwan@redhat.com>
M: Kotresh HR <khiremat@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
+M: Sunny Kumar <sunkumar@redhat.com>
S: Maintained
F: geo-replication/
Glusterfind
-M: Milind Changire <mchangir@redhat.com>
-P: Aravinda VK <avishwan@redhat.com>
+M: Aravinda VK <avishwan@redhat.com>
S: Maintained
F: tools/glusterfind/
@@ -328,12 +277,12 @@ S: Maintained
F: api/
libglusterfs
-M: Amar Tumballi <amarts@redhat.com>
+M: Amar Tumballi <amarts@gmail.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
M: Jeff Darcy <jeff@pl.atyp.us>
P: Kaleb Keithley <kkeithle@redhat.com>
P: Niels de Vos <ndevos@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
-P: Raghavendra Gowdappa <rgowdapp@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
P: Shyamsundar Ranganathan <srangana@redhat.com>
S: Maintained
F: libglusterfs/
@@ -341,43 +290,26 @@ F: libglusterfs/
xxhash
M: Aravinda VK <avishwan@redhat.com>
M: Kotresh HR <khiremat@redhat.com>
-P: Amar Tumballi <amarts@redhat.com>
+P: Yaniv Kaul <ykaul@redhat.com>
S: Maintained
F: contrib/xxhash/
T: https://github.com/Cyan4973/xxHash.git
-Management Daemon - glusterd1
+Management Daemon - glusterd
M: Atin Mukherjee <amukherj@redhat.com>
-M: Samikshan Bairagya <samikshan@gmail.com>
+M: Mohit Agrawal <moagrawa@redhat.com>
+M: Sanju Rakonde <srakonde@redhat.com>
S: Maintained
F: cli/
F: xlators/mgmt/glusterd/
-Management Daemon - glusterd2
-M: Kaushal M <kaushal@redhat.com>
-M: Prashanth Pai <ppai@redhat.com>
-P: Aravinda VK <avishwan@redhat.com>
-S: Maintained
-T: https://github.com/gluster/glusterd2.git
-
Protocol
-M: Kaleb Keithley <kkeithle@redhat.com>
M: Niels de Vos <ndevos@redhat.com>
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
S: Maintained
F: xlators/protocol/
-RDMA subsystem
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
-M: Amar Tumballi <amarts@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
-S: Maintained
-F: rpc/rpc-transport/rdma/
-
Remote Procedure Call subsystem
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
-P: Milind Changire <mchangir@redhat.com>
P: Mohit Agrawal <moagrawa@redhat.com>
S: Maintained
F: rpc/rpc-lib/
@@ -385,17 +317,16 @@ F: rpc/xdr/
Snapshot
M: Raghavendra Bhat <raghavendra@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
P: Sunny Kumar <sunkumar@redhat.com>
S: Maintained
F: xlators/mgmt/glusterd/src/glusterd-snap*
F: extras/snap-scheduler.py
Socket subsystem
-M: Raghavendra Gowdappa <rgowdapp@redhat.com>
P: Krutika Dhananjay <kdhananj@redhat.com>
P: Milind Changire <mchangir@redhat.com>
-P: Mohammed Rafi KC <rkavunga@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
P: Mohit Agrawal <moagrawa@redhat.com>
S: Maintained
F: rpc/rpc-transport/socket/
@@ -408,7 +339,6 @@ F: tests/
Utilities
M: Aravinda VK <avishwan@redhat.com>
P: Niels de Vos <ndevos@redhat.com>
-P: Nigel Babu <nigelb@redhat.com>
P: Raghavendra Talur <rtalur@redhat.com>
P: Sachidanda Urs <surs@redhat.com>
S: Maintained
@@ -425,17 +355,18 @@ F: extras/systemd/glustereventsd*
Distribution Specific:
----------------------
Build:
-M: Kaleb Keithley <kkeithle@redhat.com>
M: Niels de Vos <ndevos@redhat.com>
+M: Hari Gowtham <hgowtham@redhat.com>
P: Anoop C S <anoopcs@redhat.com>
-P: Kaushal M <kaushal@redhat.com>
P: Raghavendra Talur <rtalur@redhat.com>
+P: Rinku Kothiya <rkothiya@redhat.com>
S: Maintained
Debian packages on download.gluster.org
M: packaging@gluster.org
M: Kaleb Keithley <kkeithle@redhat.com>
-P: Nigel Babu <nigelb@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
S: Maintained
W: http://download.gluster.org/pub/gluster/glusterfs/LATEST/Debian/Debian.README
T: https://github.com/gluster/glusterfs-debian.git
@@ -443,7 +374,8 @@ T: https://github.com/gluster/glusterfs-debian.git
OpenSuSE
M: packaging@gluster.org
M: Kaleb Keithley <kkeithle@redhat.com>
-P: Nigel Babu <nigelb@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
S: Maintained
W: https://build.opensuse.org/repositories/home:glusterfs
W: https://download.gluster.org/pub/gluster/glusterfs/LATEST/SuSE/SuSE.README
@@ -460,7 +392,8 @@ T: https://github.com/CentOS-Storage-SIG/glusterfs.git
Ubuntu PPA
M: packaging@gluster.org
M: Kaleb Keithley <kkeithle@redhat.com>
-P: Nigel Babu <nigelb@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
S: Maintained
W: https://launchpad.net/~gluster
W: http://download.gluster.org/pub/gluster/glusterfs/LATEST/Ubuntu/Ubuntu.README
@@ -470,22 +403,10 @@ Related projects
----------------
Gluster Block
M: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
-P: Pranith Karampuri <pkarampu@redhat.com>
-P: Niels de Vos <ndevos@redhat.com>
+M: Xiubo Li <xiubli@redhat.com>
S: Maintained
T: https://github.com/gluster/gluster-block.git
-Gluster Object
-P: Ram Edara <redara@redhat.com>
-P: Saravanakumar Arumugam <sarumuga@redhat.com>
-S: Maintained
-T: https://github.com/gluster/gluster-swift.git
-
-GlusterFS Hadoop HCFS plugin
-S: Orphan
-W: https://github.com/gluster/glusterfs-hadoop/wiki
-T: https://github.com/gluster/glusterfs-hadoop.git
-
GlusterFS core-utils
M: Anoop C S <anoopcs@redhat.com>
S: Maintained
@@ -499,11 +420,6 @@ S: Maintained
T: git://github.com/nfs-ganesha/nfs-ganesha.git
F: src/nfs-ganesha~/src/FSAL/FSAL_GLUSTER/
-Nagios Monitoring
-M: Sahina Bose <sabose@redhat.com>
-S: Maintained
-T: https://github.com/gluster/nagios-plugins-gluster.git
-
QEMU integration
M: Niels de Vos <ndevos@redhat.com>
M: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
@@ -530,7 +446,6 @@ Testing - Glusto-Tests
M: Jonathan Holloway <jholloway@redhat.com>
M: Vijay Bhaskar Reddy Avuthu <vavuthu@redhat.com>
M: Akarsha Rai <akrai@redhat.com>
-P: Nigel Babu <nigelb@redhat.com>
S: Maintained
T: https://github.com/gluster/glusto-tests.git
@@ -545,16 +460,13 @@ Infrastructure
--------------
Platform
-M: Nigel Babu <nigelb@redhat.com>
M: Michael Scherer <misc@redhat.com>
P: Shyamsundar Ranganathan <srangana@redhat.com>
-P: Amar Tumballi <amarts@redhat.com>
+P: Amar Tumballi <amarts@gmail.com>
Continuous Integration
-M: Nigel Babu <nigelb@redhat.com>
M: Michael Scherer <misc@redhat.com>
M: Deepshikha Khandelwal <dkhandel@redhat.com>
-P: Kaushal M <kaushal@redhat.com>
P: Niels de Vos <ndevos@redhat.com>
Special Thanks
@@ -562,6 +474,17 @@ Special Thanks
GlusterFS would not be possible without the contributions of:
+
+M: Vijay Bellur <vbellur@redhat.com>
+M: Jeff Darcy <jeff@pl.atyp.us>
+M: Shreyas Siravara <sshreyas@fb.com>
+M: Kaushal M <kaushal@redhat.com>
+M: Nigel Babu
+M: Prashanth Pai
+P: Sanoj Unnikrishnan
+P: Milind Changire <mchangir@redhat.com>
+P: Sunil Kumar Acharya <sheggodu@redhat.com>
+M: Samikshan Bairagya <samikshan@gmail.com>
M: Chris Hertel
M: M. Mohan Kumar <mohan@in.ibm.com>
M: Shishir Gowda <gowda.shishir@gmail.com>
@@ -583,3 +506,5 @@ M: Jay Vyas
M: Luis Pabon
M: Ira Cooper
M: Shwetha Panduranga
+M: Nithya Balachandran
+M: Raghavendra Gowdappa
diff --git a/Makefile.am b/Makefile.am
index e0c795f418f..98ea5c1038d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -9,8 +9,9 @@ EXTRA_DIST = autogen.sh \
contrib/umountd \
$(shell find $(top_srcdir)/tests -type f -print)
-SUBDIRS = $(ARGP_STANDALONE_DIR) rpc/xdr/gen libglusterfs rpc api xlators \
- glusterfsd $(FUSERMOUNT_SUBDIR) doc extras cli heal \
+
+SUBDIRS = $(ARGP_STANDALONE_DIR) libglusterfs rpc libglusterd api \
+ glusterfsd xlators $(FUSERMOUNT_SUBDIR) doc extras cli heal \
@SYNCDAEMON_SUBDIR@ @UMOUNTD_SUBDIR@ tools events
pkgconfigdir = @pkgconfigdir@
@@ -18,9 +19,13 @@ pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile
+clean-local:
+ find . -name '*.o' -o -name '*.lo' -o -name '.Po' | xargs rm -f
+
gitclean: distclean
find . -name Makefile.in -exec rm -f {} \;
find . -name mount.glusterfs -exec rm -f {} \;
+ find . -name .deps -o -name .libs | xargs rm -rf
rm -fr autom4te.cache
rm -f missing aclocal.m4 config.h.in config.guess config.sub ltmain.sh install-sh configure depcomp
@@ -48,4 +53,3 @@ gen-VERSION:
./build-aux/pkg-version --full \
> $(abs_top_builddir)/$(distdir)/VERSION; \
fi
-
diff --git a/README.md b/README.md
index 92f829e431e..9d68e033782 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,7 @@
petabytes. It provides interfaces for object, block and file storage.
## Development
- Contributions to gluster in the form of patches and new feature additions can
- be made by following steps outlined at [Developers Guide](http://docs.gluster.org/en/latest/Developer-guide/Developers-Index/#contributing-to-the-gluster-community).
+ The development workflow is documented in [Contributors guide](CONTRIBUTING.md)
## Documentation
The Gluster documentation can be found at [Gluster Docs](http://docs.gluster.org).
diff --git a/api/examples/glfsxmp.c b/api/examples/glfsxmp.c
index 9d96eea1d87..a55616ef739 100644
--- a/api/examples/glfsxmp.c
+++ b/api/examples/glfsxmp.c
@@ -6,6 +6,8 @@
#include <string.h>
#include <time.h>
+#define TEST_STR_LEN 2048
+
int
test_dirops(glfs_t *fs)
{
@@ -32,6 +34,8 @@ int
test_xattr(glfs_t *fs)
{
char *filename = "/filename2";
+ char *linkfile = "/linkfile";
+ glfs_fd_t *fd = NULL;
char buf[512];
char *ptr;
int ret;
@@ -42,16 +46,79 @@ test_xattr(glfs_t *fs)
ret = glfs_setxattr(fs, filename, "user.testkey2", "testval", 8, 0);
fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ ret = glfs_getxattr(fs, filename, "user.testkey", buf, 512);
+ fprintf(stderr, "getxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
ret = glfs_listxattr(fs, filename, buf, 512);
fprintf(stderr, "listxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
if (ret < 0)
return -1;
+ ret = glfs_symlink(fs, "filename", linkfile);
+ fprintf(stderr, "symlink(%s %s): %s\n", filename, linkfile,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_readlink(fs, linkfile, buf, 512);
+ fprintf(stderr, "readlink(%s) : %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lsetxattr(fs, filename, "user.testkey3", "testval", 8, 0);
+ fprintf(stderr, "lsetxattr(%s) : %d (%s)\n", linkfile, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_llistxattr(fs, linkfile, buf, 512);
+ fprintf(stderr, "llistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lgetxattr(fs, filename, "user.testkey3", buf, 512);
+ fprintf(stderr, "lgetxattr(%s): %d (%s)\n", linkfile, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
for (ptr = buf; ptr < buf + ret; ptr++) {
printf("key=%s\n", ptr);
ptr += strlen(ptr);
}
+ ret = glfs_removexattr(fs, filename, "user.testkey2");
+ fprintf(stderr, "removexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ fd = glfs_open(fs, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_fsetxattr(fd, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_fgetxattr(fd, "user.testkey2", buf, 512);
+ fprintf(stderr, "fgetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_flistxattr(fd, buf, 512);
+ fprintf(stderr, "flistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_fremovexattr(fd, "user.testkey2");
+ fprintf(stderr, "fremovexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ glfs_close(fd);
+
return 0;
}
@@ -59,22 +126,28 @@ int
test_chdir(glfs_t *fs)
{
int ret = -1;
+ char *dir = "/dir";
char *topdir = "/topdir";
char *linkdir = "/linkdir";
+ char *linkdir2 = "/linkdir2";
char *subdir = "./subdir";
char *respath = NULL;
char pathbuf[4096];
ret = glfs_mkdir(fs, topdir, 0755);
- if (ret) {
- fprintf(stderr, "mkdir(%s): %s\n", topdir, strerror(errno));
+ fprintf(stderr, "mkdir(%s): %s\n", topdir, strerror(errno));
+ if (ret)
+ return -1;
+
+ ret = glfs_mkdir(fs, dir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", dir, strerror(errno));
+ if (ret)
return -1;
- }
respath = glfs_getcwd(fs, pathbuf, 4096);
fprintf(stdout, "getcwd() = %s\n", respath);
- ret = glfs_symlink(fs, topdir, linkdir);
+ ret = glfs_symlink(fs, "topdir", linkdir);
if (ret) {
fprintf(stderr, "symlink(%s, %s): %s\n", topdir, linkdir,
strerror(errno));
@@ -234,7 +307,7 @@ test_h_unlink(void)
}
peek_stat(&sb);
- dir = glfs_h_mkdir(fs, parent, my_dir, 0644, &sb);
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
if (dir == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
my_dir, parent, strerror(errno));
@@ -250,7 +323,7 @@ test_h_unlink(void)
goto out;
}
- subdir = glfs_h_mkdir(fs, dir, my_subdir, 0644, &sb);
+ subdir = glfs_h_mkdir(fs, dir, my_subdir, 0755, &sb);
if (subdir == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
my_subdir, dir, strerror(errno));
@@ -374,7 +447,7 @@ test_h_getsetattrs(void)
}
peek_stat(&sb);
- dir = glfs_h_mkdir(fs, parent, my_dir, 0644, &sb);
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
if (dir == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
my_dir, parent, strerror(errno));
@@ -472,7 +545,7 @@ test_h_truncate(void)
}
peek_stat(&sb);
- parent = glfs_h_mkdir(fs, root, my_dir, 0644, &sb);
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
if (parent == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
my_dir, root, strerror(errno));
@@ -611,7 +684,7 @@ test_h_links(void)
}
peek_stat(&sb);
- parent = glfs_h_mkdir(fs, root, my_dir, 0644, &sb);
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
if (parent == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
my_dir, root, strerror(errno));
@@ -629,7 +702,7 @@ test_h_links(void)
}
peek_stat(&sb);
- dirsrc = glfs_h_mkdir(fs, parent, linksrc_dir, 0644, &sb);
+ dirsrc = glfs_h_mkdir(fs, parent, linksrc_dir, 0755, &sb);
if (dirsrc == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
linksrc_dir, parent, strerror(errno));
@@ -638,7 +711,7 @@ test_h_links(void)
}
peek_stat(&sb);
- dirtgt = glfs_h_mkdir(fs, parent, linktgt_dir, 0644, &sb);
+ dirtgt = glfs_h_mkdir(fs, parent, linktgt_dir, 0755, &sb);
if (dirtgt == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
linktgt_dir, parent, strerror(errno));
@@ -758,7 +831,7 @@ test_h_rename(void)
}
peek_stat(&sb);
- parent = glfs_h_mkdir(fs, root, my_dir, 0644, &sb);
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
if (parent == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
my_dir, root, strerror(errno));
@@ -776,7 +849,7 @@ test_h_rename(void)
}
peek_stat(&sb);
- dirsrc = glfs_h_mkdir(fs, parent, src_dir, 0644, &sb);
+ dirsrc = glfs_h_mkdir(fs, parent, src_dir, 0755, &sb);
if (dirsrc == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
src_dir, parent, strerror(errno));
@@ -785,7 +858,7 @@ test_h_rename(void)
}
peek_stat(&sb);
- dirtgt = glfs_h_mkdir(fs, parent, tgt_dir, 0644, &sb);
+ dirtgt = glfs_h_mkdir(fs, parent, tgt_dir, 0755, &sb);
if (dirtgt == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
tgt_dir, parent, strerror(errno));
@@ -932,7 +1005,7 @@ test_h_performance(void)
goto out;
}
- dir = glfs_h_mkdir(fs, parent, my_dir, 0644, &sb);
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
if (dir == NULL) {
fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
my_dir, parent, strerror(errno));
@@ -1007,7 +1080,7 @@ test_h_performance(void)
c_ts.tv_nsec = o_ts.tv_nsec = 0;
sprintf(my_file_name, "%s1", full_dir_path);
- ret = glfs_mkdir(fs, my_file_name, 0644);
+ ret = glfs_mkdir(fs, my_file_name, 0755);
if (ret != 0) {
fprintf(stderr, "glfs_mkdir: error creating %s: from (%p),%s\n", my_dir,
parent, strerror(errno));
@@ -1097,7 +1170,7 @@ test_handleops(int argc, char *argv[])
char *full_newnod_name = "/testdir/nod1", *newnod_name = "nod1";
/* Initialize test area */
- ret = glfs_mkdir(fs, full_parent_name, 0644);
+ ret = glfs_mkdir(fs, full_parent_name, 0755);
if (ret != 0 && errno != EEXIST) {
fprintf(stderr, "%s: (%p) %s\n", full_parent_name, fd, strerror(errno));
printf("Test initialization failed on volume %s\n", argv[1]);
@@ -1398,7 +1471,7 @@ test_handleops(int argc, char *argv[])
}
peek_stat(&sb);
- leaf = glfs_h_mkdir(fs, parent, newparent_name, 0644, &sb);
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
if (leaf == NULL) {
fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
newparent_name, parent, strerror(errno));
@@ -1410,7 +1483,7 @@ test_handleops(int argc, char *argv[])
glfs_h_close(leaf);
leaf = NULL;
- leaf = glfs_h_mkdir(fs, parent, newparent_name, 0644, &sb);
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
if (leaf != NULL || errno != EEXIST) {
fprintf(stderr,
"glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
@@ -1522,6 +1595,135 @@ out:
}
int
+test_write_apis(glfs_t *fs)
+{
+ /* Add more content here */
+ /* Some apis we can get are */
+ /*
+ 0. glfs_set_xlator_option()
+
+ Read/Write combinations:
+ . glfs_{p,}readv/{p,}writev
+ . glfs_pread/pwrite
+
+ tests/basic/gfapi/gfapi-async-calls-test.c
+ . glfs_read_async/write_async
+ . glfs_pread_async/pwrite_async
+ . glfs_readv_async/writev_async
+ . glfs_preadv_async/pwritev_async
+
+ . ftruncate/ftruncate_async
+ . fsync/fsync_async
+ . fdatasync/fdatasync_async
+
+ */
+
+ glfs_fd_t *fd = NULL;
+ char *filename = "/filename2";
+ int flags = O_RDWR;
+ char *buf = "some bytes!";
+ char writestr[TEST_STR_LEN];
+ struct iovec iov = {&writestr, TEST_STR_LEN};
+ int ret, i;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+
+ fd = glfs_open(fs, filename, flags);
+ if (!fd)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_writev(fd, &iov, 1, flags);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwrite(fd, buf, 10, 4, flags, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pwrite(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwritev(fd, &iov, 1, 4, flags);
+ if (ret < 0) {
+ fprintf(stderr, "pwritev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ return 0;
+}
+
+int
+test_metadata_ops(glfs_t *fs, glfs_t *fs2)
+{
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[32];
+
+ char *filename = "/filename2";
+ int ret;
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ fprintf(stderr, "creat(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ fd2 = glfs_open(fs2, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ glfs_lseek(fd2, 0, SEEK_SET);
+
+ ret = glfs_read(fd2, readbuf, 32, 0);
+
+ printf("read %d, %s", ret, readbuf);
+
+ /* get stat */
+ ret = glfs_fstat(fd2, &sb);
+
+ ret = glfs_access(fs, filename, R_OK);
+
+ /* set stat */
+ /* TODO: got some errors, need to fix */
+ /* ret = glfs_fsetattr(fd2, &gsb); */
+
+ glfs_close(fd);
+ glfs_close(fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod(fs, filename, S_IFIFO, 0);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rename(fs, filename, "/filename4");
+ fprintf(stderr, "rename(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_unlink(fs, "/filename4");
+ fprintf(stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror(errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir(fs, filename, 0);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rmdir(fs, filename);
+ fprintf(stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror(errno));
+}
+int
main(int argc, char *argv[])
{
glfs_t *fs2 = NULL;
@@ -1531,6 +1733,10 @@ main(int argc, char *argv[])
struct stat sb = {
0,
};
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
char readbuf[32];
char writebuf[32];
@@ -1559,6 +1765,9 @@ main(int argc, char *argv[])
fprintf(stderr, "glfs_init: returned %d\n", ret);
+ if (ret)
+ goto out;
+
sleep(2);
fs2 = glfs_new(argv[1]);
@@ -1577,50 +1786,7 @@ main(int argc, char *argv[])
fprintf(stderr, "glfs_init: returned %d\n", ret);
- ret = glfs_lstat(fs, filename, &sb);
- fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
-
- fd = glfs_creat(fs, filename, O_RDWR, 0644);
- fprintf(stderr, "%s: (%p) %s\n", filename, fd, strerror(errno));
-
- fd2 = glfs_open(fs2, filename, O_RDWR);
- fprintf(stderr, "%s: (%p) %s\n", filename, fd, strerror(errno));
-
- sprintf(writebuf, "hi there\n");
- ret = glfs_write(fd, writebuf, 32, 0);
-
- glfs_lseek(fd2, 0, SEEK_SET);
-
- ret = glfs_read(fd2, readbuf, 32, 0);
-
- printf("read %d, %s", ret, readbuf);
-
- glfs_close(fd);
- glfs_close(fd2);
-
- filename = "/filename3";
- ret = glfs_mknod(fs, filename, S_IFIFO, 0);
- fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
-
- ret = glfs_lstat(fs, filename, &sb);
- fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
-
- ret = glfs_rename(fs, filename, "/filename4");
- fprintf(stderr, "rename(%s): (%d) %s\n", filename, ret, strerror(errno));
-
- ret = glfs_unlink(fs, "/filename4");
- fprintf(stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
- strerror(errno));
-
- filename = "/dirname2";
- ret = glfs_mkdir(fs, filename, 0);
- fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
-
- ret = glfs_lstat(fs, filename, &sb);
- fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
-
- ret = glfs_rmdir(fs, filename);
- fprintf(stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror(errno));
+ test_metadata_ops(fs, fs2);
test_dirops(fs);
@@ -1631,8 +1797,15 @@ main(int argc, char *argv[])
test_handleops(argc, argv);
// done
+ /* Test some extra apis */
+ test_write_apis(fs);
+
+ glfs_statvfs(fs, "/", &sfs);
+
glfs_fini(fs);
glfs_fini(fs2);
+ ret = 0;
+out:
return ret;
}
diff --git a/api/src/Makefile.am b/api/src/Makefile.am
index 6ed30bc99f6..7f9a7d17b35 100644
--- a/api/src/Makefile.am
+++ b/api/src/Makefile.am
@@ -19,7 +19,7 @@ libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src \
-I$(top_builddir)/rpc/xdr/src \
-DDATADIR=\"$(localstatedir)\" \
- -D__USE_FILE_OFFSET64
+ -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/api/src/README.Symbol_Versions b/api/src/README.Symbol_Versions
index d5cdedd826b..b6ec95f9311 100644
--- a/api/src/README.Symbol_Versions
+++ b/api/src/README.Symbol_Versions
@@ -1,3 +1,3 @@
-See .../doc/gfapi-symbol-versions/gfapi-symbol-versions.md
+See ../../doc/developer-guide/gfapi-symbol-versions.md
diff --git a/api/src/gfapi-messages.h b/api/src/gfapi-messages.h
index 03269a1c9d8..b9223940416 100644
--- a/api/src/gfapi-messages.h
+++ b/api/src/gfapi-messages.h
@@ -11,7 +11,7 @@
#ifndef _GFAPI_MESSAGES_H__
#define _GFAPI_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -49,6 +49,99 @@ GLFS_MSGID(API, API_MSG_MEM_ACCT_INIT_FAILED, API_MSG_MASTER_XLATOR_INIT_FAILED,
API_MSG_INODE_LINK_FAILED, API_MSG_STATEDUMP_FAILED,
API_MSG_XREADDIRP_R_FAILED, API_MSG_LOCK_INSERT_MERGE_FAILED,
API_MSG_SETTING_LOCK_TYPE_FAILED, API_MSG_INODE_FIND_FAILED,
- API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED);
+ API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED,
+ API_MSG_INVALID_ARG, API_MSG_UPCALL_EVENT_NULL_RECEIVED,
+ API_MSG_FLAGS_HANDLE, API_MSG_FDCREATE_FAILED_ON_GRAPH,
+ API_MSG_TRANS_RDMA_DEP, API_MSG_TRANS_NOT_SUPPORTED,
+ API_MSG_FS_NOT_INIT, API_MSG_INVALID_SYSRQ,
+ API_MSG_DECODE_XDR_FAILED, API_MSG_NULL, API_MSG_CALL_NOT_SUCCESSFUL,
+ API_MSG_CALL_NOT_VALID, API_MSG_UNABLE_TO_DEL,
+ API_MSG_REMOTE_HOST_DISCONN, API_MSG_HANDLE_NOT_SET);
+#define API_MSG_ALLOC_FAILED_STR "Upcall allocation failed"
+#define API_MSG_LOCK_INSERT_MERGE_FAILED_STR \
+ "Lock insertion and splitting/merging failed"
+#define API_MSG_SETTING_LOCK_TYPE_FAILED_STR "Setting lock type failed"
+
+#define API_MSG_INVALID_ARG_STR "Invalid"
+#define API_MSG_INVALID_ENTRY_STR "Upcall entry validation failed"
+#define API_MSG_INODE_FIND_FAILED_STR "Unable to find inode entry"
+#define API_MSG_CREATE_HANDLE_FAILED_STR "handle creation failed"
+#define API_MSG_UPCALL_EVENT_NULL_RECEIVED_STR \
+ "Upcall_EVENT_NULL received. Skipping it"
+#define API_MSG_UPCALL_SYNCOP_FAILED_STR "Synctask for upcall failed"
+#define API_MSG_FDCREATE_FAILED_STR "Allocating anonymous fd failed"
+#define API_MSG_XREADDIRP_R_FAILED_STR "glfs_x_readdirp_r failed"
+#define API_MSG_FDCTX_SET_FAILED_STR "Setting fd ctx failed"
+#define API_MSG_FLAGS_HANDLE_STR "arg not set. Flags handled are"
+#define API_MSG_INODE_REFRESH_FAILED_STR "inode refresh failed"
+#define API_MSG_INODE_LINK_FAILED_STR "inode linking failed"
+#define API_MSG_GET_CWD_FAILED_STR "Failed to get cwd"
+#define API_MSG_FGETXATTR_FAILED_STR "fgetxattr failed"
+#define API_MSG_LOCKINFO_KEY_MISSING_STR "missing lockinfo key"
+#define API_MSG_FSYNC_FAILED_STR "fsync() failed"
+#define API_MSG_FDCREATE_FAILED_ON_GRAPH_STR "fd_create failed on graph"
+#define API_MSG_INODE_PATH_FAILED_STR "inode_path failed"
+#define API_MSG_SYNCOP_OPEN_FAILED_STR "syncop_open failed"
+#define API_MSG_LOCK_MIGRATE_FAILED_STR "lock migration failed on graph"
+#define API_MSG_OPENFD_SKIPPED_STR "skipping openfd in graph"
+#define API_MSG_FIRST_LOOKUP_GRAPH_FAILED_STR "first lookup on graph failed"
+#define API_MSG_CWD_GRAPH_REF_FAILED_STR "cwd refresh of graph failed"
+#define API_MSG_SWITCHED_GRAPH_STR "switched to graph"
+#define API_MSG_FSETXATTR_FAILED_STR "fsetxattr failed"
+#define API_MSG_MEM_ACCT_INIT_FAILED_STR "Memory accounting init failed"
+#define API_MSG_MASTER_XLATOR_INIT_FAILED_STR \
+ "master xlator for initialization failed"
+#define API_MSG_GFAPI_XLATOR_INIT_FAILED_STR \
+ "failed to initialize gfapi translator"
+#define API_MSG_VOLFILE_OPEN_FAILED_STR "volume file open failed"
+#define API_MSG_VOL_SPEC_FILE_ERROR_STR "Cannot reach volume specification file"
+#define API_MSG_TRANS_RDMA_DEP_STR \
+ "transport RDMA is deprecated, falling back to tcp"
+#define API_MSG_TRANS_NOT_SUPPORTED_STR \
+ "transport is not supported, possible values tcp|unix"
+#define API_MSG_GLFS_FSOBJ_NULL_STR "fs is NULL"
+#define API_MSG_FS_NOT_INIT_STR "fs is not properly initialized"
+#define API_MSG_FSMUTEX_LOCK_FAILED_STR \
+ "pthread lock on glfs mutex, returned error"
+#define API_MSG_FSMUTEX_UNLOCK_FAILED_STR \
+ "pthread unlock on glfs mutex, returned error"
+#define API_MSG_COND_WAIT_FAILED_STR "cond wait failed"
+#define API_MSG_INVALID_SYSRQ_STR "not a valid sysrq"
+#define API_MSG_GRAPH_CONSTRUCT_FAILED_STR "failed to construct the graph"
+#define API_MSG_API_XLATOR_ERROR_STR \
+ "api master xlator cannot be specified in volume file"
+#define API_MSG_STATEDUMP_FAILED_STR "statedump failed"
+#define API_MSG_DECODE_XDR_FAILED_STR \
+ "Failed to decode xdr response for GF_CBK_STATEDUMP"
+#define API_MSG_NULL_STR "NULL"
+#define API_MSG_XDR_PAYLOAD_FAILED_STR "failed to create XDR payload"
+#define API_MSG_CALL_NOT_SUCCESSFUL_STR \
+ "GET_VOLUME_INFO RPC call is not successful"
+#define API_MSG_XDR_RESPONSE_DECODE_FAILED_STR \
+ "Failed to decode xdr response for GET_VOLUME_INFO"
+#define API_MSG_CALL_NOT_VALID_STR \
+ "Response received for GET_VOLUME_INFO RPC is not valid"
+#define API_MSG_GET_VOLINFO_CBK_FAILED_STR \
+ "In GET_VOLUME_INFO cbk, received error"
+#define API_MSG_FETCH_VOLUUID_FAILED_STR "Unable to fetch volume UUID"
+#define API_MSG_INSUFF_SIZE_STR "Insufficient size passed"
+#define API_MSG_FRAME_CREAT_FAILED_STR "failed to create the frame"
+#define API_MSG_DICT_SET_FAILED_STR "failed to set"
+#define API_MSG_XDR_DECODE_FAILED_STR "XDR decoding error"
+#define API_MSG_GET_VOLFILE_FAILED_STR "failed to get the volume file"
+#define API_MSG_VOLFILE_INFO_STR "No change in volfile, continuing"
+#define API_MSG_UNABLE_TO_DEL_STR "unable to delete file"
+#define API_MSG_WRONG_OPVERSION_STR \
+ "Server is operating at an op-version which is not supported"
+#define API_MSG_DICT_SERIALIZE_FAILED_STR "Failed to serialize dictionary"
+#define API_MSG_REMOTE_HOST_CONN_FAILED_STR "Failed to connect to remote-host"
+#define API_MSG_REMOTE_HOST_DISCONN_STR "disconnected from remote-host"
+#define API_MSG_VOLFILE_SERVER_EXHAUST_STR "Exhausted all volfile servers"
+#define API_MSG_VOLFILE_CONNECTING_STR "connecting to next volfile server"
+#define API_MSG_CREATE_RPC_CLIENT_FAILED_STR "failed to create rpc clnt"
+#define API_MSG_REG_NOTIFY_FUNC_FAILED_STR "failed to register notify function"
+#define API_MSG_REG_CBK_FUNC_FAILED_STR "failed to register callback function"
+#define API_MSG_NEW_GRAPH_STR "New graph coming up"
+#define API_MSG_HANDLE_NOT_SET_STR "handle not set. Flags handled for xstat are"
#endif /* !_GFAPI_MESSAGES_H__ */
diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
index a71422c8577..bc639e6b99f 100644
--- a/api/src/gfapi.aliases
+++ b/api/src/gfapi.aliases
@@ -170,21 +170,32 @@ _pub_glfs_h_lease _glfs_h_lease$GFAPI_4.0.0
_pub_glfs_upcall_lease_get_object _glfs_upcall_lease_get_object$GFAPI_4.1.6
_pub_glfs_upcall_lease_get_lease_type _glfs_upcall_lease_get_lease_type$GFAPI_4.1.6
-_pub_glfs_read_async _glfs_read_async$GFAPI_future
-_pub_glfs_write_async _glfs_write_async$GFAPI_future
-_pub_glfs_readv_async _glfs_readv_async$GFAPI_future
-_pub_glfs_writev_async _glfs_writev_async$GFAPI_future
-_pub_glfs_pread _glfs_pread$GFAPI_future
-_pub_glfs_pwrite _glfs_pwrite$GFAPI_future
-_pub_glfs_pread_async _glfs_pread_async$GFAPI_future
-_pub_glfs_pwrite_async _glfs_pwrite_async$GFAPI_future
-_pub_glfs_preadv_async _glfs_preadv_async$GFAPI_future
-_pub_glfs_pwritev_async _glfs_pwritev_async$GFAPI_future
-_pub_glfs_fsync _glfs_fsync$GFAPI_future
-_pub_glfs_fsync_async _glfs_fsync_async$GFAPI_future
-_pub_glfs_fdatasync _glfs_fdatasync$GFAPI_future
-_pub_glfs_fdatasync_async _glfs_fdatasync_async$GFAPI_future
-_pub_glfs_ftruncate _glfs_ftruncate$GFAPI_future
-_pub_glfs_ftruncate_async _glfs_ftruncate_async$GFAPI_future
-_pub_glfs_discard_async _glfs_discard_async$GFAPI_future
-_pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_future
+_priv_glfs_statx _glfs_statx$GFAPI_6.0
+_priv_glfs_iatt_from_statx _glfs_iatt_from_statx$GFAPI_6.0
+_priv_glfs_setfspid _glfs_setfspid$GFAPI_6.1
+
+_pub_glfs_read_async _glfs_read_async$GFAPI_6.0
+_pub_glfs_write_async _glfs_write_async$GFAPI_6.0
+_pub_glfs_readv_async _glfs_readv_async$GFAPI_6.0
+_pub_glfs_writev_async _glfs_writev_async$GFAPI_6.0
+_pub_glfs_pread _glfs_pread$GFAPI_6.0
+_pub_glfs_pwrite _glfs_pwrite$GFAPI_6.0
+_pub_glfs_pread_async _glfs_pread_async$GFAPI_6.0
+_pub_glfs_pwrite_async _glfs_pwrite_async$GFAPI_6.0
+_pub_glfs_preadv_async _glfs_preadv_async$GFAPI_6.0
+_pub_glfs_pwritev_async _glfs_pwritev_async$GFAPI_6.0
+_pub_glfs_fsync _glfs_fsync$GFAPI_6.0
+_pub_glfs_fsync_async _glfs_fsync_async$GFAPI_6.0
+_pub_glfs_fdatasync _glfs_fdatasync$GFAPI_6.0
+_pub_glfs_fdatasync_async _glfs_fdatasync_async$GFAPI_6.0
+_pub_glfs_ftruncate _glfs_ftruncate$GFAPI_6.0
+_pub_glfs_ftruncate_async _glfs_ftruncate_async$GFAPI_6.0
+_pub_glfs_discard_async _glfs_discard_async$GFAPI_6.0
+_pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_6.0
+_pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_6.0
+_pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0
+_pub_glfs_setattr _glfs_setattr$GFAPI_6.0
+
+_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_7.0
+
+_pub_glfs_h_creat_open _glfs_h_creat_open@GFAPI_6.6
diff --git a/api/src/gfapi.map b/api/src/gfapi.map
index c47323781fb..228ac47c084 100644
--- a/api/src/gfapi.map
+++ b/api/src/gfapi.map
@@ -231,11 +231,18 @@ GFAPI_4.0.0 {
} GFAPI_3.13.0;
GFAPI_4.1.6 {
- glfs_upcall_lease_get_object;
- glfs_upcall_lease_get_lease_type;
+ global:
+ glfs_upcall_lease_get_object;
+ glfs_upcall_lease_get_lease_type;
} GFAPI_4.0.0;
-GFAPI_future {
+GFAPI_PRIVATE_6.0 {
+ global:
+ glfs_statx;
+ glfs_iatt_from_statx;
+} GFAPI_4.1.6;
+
+GFAPI_6.0 {
global:
glfs_read_async;
glfs_write_async;
@@ -255,5 +262,22 @@ GFAPI_future {
glfs_ftruncate_async;
glfs_discard_async;
glfs_zerofill_async;
-} GFAPI_4.1.6;
+ glfs_copy_file_range;
+ glfs_setattr;
+ glfs_fsetattr;
+} GFAPI_PRIVATE_6.0;
+GFAPI_PRIVATE_6.1 {
+ global:
+ glfs_setfspid;
+} GFAPI_6.0;
+
+GFAPI_6.6 {
+ global:
+ glfs_h_creat_open;
+} GFAPI_PRIVATE_6.1;
+
+GFAPI_7.0 {
+ global:
+ glfs_set_statedump_path;
+} GFAPI_6.6;
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
index 302ccc5db07..6aa3c5602d1 100644
--- a/api/src/glfs-fops.c
+++ b/api/src/glfs-fops.c
@@ -18,12 +18,13 @@
#include "glfs-internal.h"
#include "glfs-mem-types.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
#include "glfs.h"
#include "gfapi-messages.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include <limits.h>
#include "glusterfs3.h"
+#include <glusterfs/iatt.h>
#ifdef NAME_MAX
#define GF_NAME_MAX NAME_MAX
@@ -33,7 +34,7 @@
struct upcall_syncop_args {
struct glfs *fs;
- struct gf_upcall *upcall_data;
+ struct gf_upcall upcall_data;
};
#define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1)
@@ -118,8 +119,8 @@ glfs_get_upcall_cache_invalidation(struct gf_upcall *to_up_data,
ca_data = GF_CALLOC(1, sizeof(*ca_data), glfs_mt_upcall_entry_t);
if (!ca_data) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_ALLOC_FAILED,
- "Upcall entry allocation failed.");
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
goto out;
}
@@ -153,8 +154,8 @@ glfs_get_upcall_lease(struct gf_upcall *to_up_data,
ca_data = GF_CALLOC(1, sizeof(*ca_data), glfs_mt_upcall_entry_t);
if (!ca_data) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_ALLOC_FAILED,
- "Upcall entry allocation failed.");
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
goto out;
}
@@ -206,6 +207,201 @@ glfs_iatt_to_stat(struct glfs *fs, struct iatt *iatt, struct stat *stat)
stat->st_dev = fs->dev_id;
}
+void
+glfs_iatt_to_statx(struct glfs *fs, const struct iatt *iatt,
+ struct glfs_stat *statx)
+{
+ statx->glfs_st_mask = 0;
+
+ statx->glfs_st_mode = 0;
+ if (IATT_TYPE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mode |= st_mode_type_from_ia(iatt->ia_type);
+ statx->glfs_st_mask |= GLFS_STAT_TYPE;
+ }
+
+ if (IATT_MODE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mode |= st_mode_prot_from_ia(iatt->ia_prot);
+ statx->glfs_st_mask |= GLFS_STAT_MODE;
+ }
+
+ if (IATT_NLINK_VALID(iatt->ia_flags)) {
+ statx->glfs_st_nlink = iatt->ia_nlink;
+ statx->glfs_st_mask |= GLFS_STAT_NLINK;
+ }
+
+ if (IATT_UID_VALID(iatt->ia_flags)) {
+ statx->glfs_st_uid = iatt->ia_uid;
+ statx->glfs_st_mask |= GLFS_STAT_UID;
+ }
+
+ if (IATT_GID_VALID(iatt->ia_flags)) {
+ statx->glfs_st_gid = iatt->ia_gid;
+ statx->glfs_st_mask |= GLFS_STAT_GID;
+ }
+
+ if (IATT_ATIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_atime.tv_sec = iatt->ia_atime;
+ statx->glfs_st_atime.tv_nsec = iatt->ia_atime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_ATIME;
+ }
+
+ if (IATT_MTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mtime.tv_sec = iatt->ia_mtime;
+ statx->glfs_st_mtime.tv_nsec = iatt->ia_mtime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_MTIME;
+ }
+
+ if (IATT_CTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_ctime.tv_sec = iatt->ia_ctime;
+ statx->glfs_st_ctime.tv_nsec = iatt->ia_ctime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_CTIME;
+ }
+
+ if (IATT_BTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_btime.tv_sec = iatt->ia_btime;
+ statx->glfs_st_btime.tv_nsec = iatt->ia_btime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_BTIME;
+ }
+
+ if (IATT_INO_VALID(iatt->ia_flags)) {
+ statx->glfs_st_ino = iatt->ia_ino;
+ statx->glfs_st_mask |= GLFS_STAT_INO;
+ }
+
+ if (IATT_SIZE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_size = iatt->ia_size;
+ statx->glfs_st_mask |= GLFS_STAT_SIZE;
+ }
+
+ if (IATT_BLOCKS_VALID(iatt->ia_flags)) {
+ statx->glfs_st_blocks = iatt->ia_blocks;
+ statx->glfs_st_mask |= GLFS_STAT_BLOCKS;
+ }
+
+ /* unconditionally present, encode as is */
+ statx->glfs_st_blksize = iatt->ia_blksize;
+ statx->glfs_st_rdev_major = ia_major(iatt->ia_rdev);
+ statx->glfs_st_rdev_minor = ia_minor(iatt->ia_rdev);
+ statx->glfs_st_dev_major = ia_major(fs->dev_id);
+ statx->glfs_st_dev_minor = ia_minor(fs->dev_id);
+
+ /* At present we do not read any localFS attributes and pass them along,
+ * so setting this to 0. As we start supporting file attributes we can
+ * populate the same here as well */
+ statx->glfs_st_attributes = 0;
+ statx->glfs_st_attributes_mask = 0;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_iatt_from_statx, 6.0)
+void
+priv_glfs_iatt_from_statx(struct iatt *iatt, const struct glfs_stat *statx)
+{
+ /* Most code in xlators are not checking validity flags before accessing
+ the items. Hence zero everything before setting valid items */
+ memset(iatt, 0, sizeof(struct iatt));
+
+ if (GLFS_STAT_TYPE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_type = ia_type_from_st_mode(statx->glfs_st_mode);
+ iatt->ia_flags |= IATT_TYPE;
+ }
+
+ if (GLFS_STAT_MODE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_prot = ia_prot_from_st_mode(statx->glfs_st_mode);
+ iatt->ia_flags |= IATT_MODE;
+ }
+
+ if (GLFS_STAT_NLINK_VALID(statx->glfs_st_mask)) {
+ iatt->ia_nlink = statx->glfs_st_nlink;
+ iatt->ia_flags |= IATT_NLINK;
+ }
+
+ if (GLFS_STAT_UID_VALID(statx->glfs_st_mask)) {
+ iatt->ia_uid = statx->glfs_st_uid;
+ iatt->ia_flags |= IATT_UID;
+ }
+
+ if (GLFS_STAT_GID_VALID(statx->glfs_st_mask)) {
+ iatt->ia_gid = statx->glfs_st_gid;
+ iatt->ia_flags |= IATT_GID;
+ }
+
+ if (GLFS_STAT_ATIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_atime = statx->glfs_st_atime.tv_sec;
+ iatt->ia_atime_nsec = statx->glfs_st_atime.tv_nsec;
+ iatt->ia_flags |= IATT_ATIME;
+ }
+
+ if (GLFS_STAT_MTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_mtime = statx->glfs_st_mtime.tv_sec;
+ iatt->ia_mtime_nsec = statx->glfs_st_mtime.tv_nsec;
+ iatt->ia_flags |= IATT_MTIME;
+ }
+
+ if (GLFS_STAT_CTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_ctime = statx->glfs_st_ctime.tv_sec;
+ iatt->ia_ctime_nsec = statx->glfs_st_ctime.tv_nsec;
+ iatt->ia_flags |= IATT_CTIME;
+ }
+
+ if (GLFS_STAT_BTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_btime = statx->glfs_st_btime.tv_sec;
+ iatt->ia_btime_nsec = statx->glfs_st_btime.tv_nsec;
+ iatt->ia_flags |= IATT_BTIME;
+ }
+
+ if (GLFS_STAT_INO_VALID(statx->glfs_st_mask)) {
+ iatt->ia_ino = statx->glfs_st_ino;
+ iatt->ia_flags |= IATT_INO;
+ }
+
+ if (GLFS_STAT_SIZE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_size = statx->glfs_st_size;
+ iatt->ia_flags |= IATT_SIZE;
+ }
+
+ if (GLFS_STAT_BLOCKS_VALID(statx->glfs_st_mask)) {
+ iatt->ia_blocks = statx->glfs_st_blocks;
+ iatt->ia_flags |= IATT_BLOCKS;
+ }
+
+ /* unconditionally present, encode as is */
+ iatt->ia_blksize = statx->glfs_st_blksize;
+ iatt->ia_rdev = makedev(statx->glfs_st_rdev_major,
+ statx->glfs_st_rdev_minor);
+ iatt->ia_dev = makedev(statx->glfs_st_dev_major, statx->glfs_st_dev_minor);
+ iatt->ia_attributes = statx->glfs_st_attributes;
+ iatt->ia_attributes_mask = statx->glfs_st_attributes_mask;
+}
+
+void
+glfsflags_from_gfapiflags(struct glfs_stat *stat, int *glvalid)
+{
+ *glvalid = 0;
+ if (stat->glfs_st_mask & GLFS_STAT_MODE) {
+ *glvalid |= GF_SET_ATTR_MODE;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_SIZE) {
+ *glvalid |= GF_SET_ATTR_SIZE;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_UID) {
+ *glvalid |= GF_SET_ATTR_UID;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_GID) {
+ *glvalid |= GF_SET_ATTR_GID;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_ATIME) {
+ *glvalid |= GF_SET_ATTR_ATIME;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_MTIME) {
+ *glvalid |= GF_SET_ATTR_MTIME;
+ }
+}
+
int
glfs_loc_unlink(loc_t *loc)
{
@@ -219,6 +415,7 @@ glfs_loc_unlink(loc_t *loc)
return 0;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_open, 3.4.0)
struct glfs_fd *
pub_glfs_open(struct glfs *fs, const char *path, int flags)
{
@@ -313,8 +510,7 @@ invalid_fs:
return glfd;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_open, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_close, 3.4.0)
int
pub_glfs_close(struct glfs_fd *glfd)
{
@@ -369,8 +565,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_close, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lstat, 3.4.0)
int
pub_glfs_lstat(struct glfs *fs, const char *path, struct stat *stat)
{
@@ -411,8 +606,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lstat, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_stat, 3.4.0)
int
pub_glfs_stat(struct glfs *fs, const char *path, struct stat *stat)
{
@@ -453,8 +647,61 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_stat, 3.4.0);
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_statx, 6.0)
+int
+priv_glfs_statx(struct glfs *fs, const char *path, const unsigned int mask,
+ struct glfs_stat *statxbuf)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (path == NULL) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (mask & ~GLFS_STAT_ALL) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && statxbuf)
+ glfs_iatt_to_statx(fs, &iatt, statxbuf);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fstat, 3.4.0)
int
pub_glfs_fstat(struct glfs_fd *glfd, struct stat *stat)
{
@@ -503,8 +750,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fstat, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_creat, 3.4.0)
struct glfs_fd *
pub_glfs_creat(struct glfs *fs, const char *path, int flags, mode_t mode)
{
@@ -651,8 +897,6 @@ invalid_fs:
return glfd;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_creat, 3.4.0);
-
#ifdef HAVE_SEEK_HOLE
static int
glfs_seek(struct glfs_fd *glfd, off_t offset, int whence)
@@ -706,6 +950,7 @@ out:
}
#endif
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lseek, 3.4.0)
off_t
pub_glfs_lseek(struct glfs_fd *glfd, off_t offset, int whence)
{
@@ -761,11 +1006,9 @@ invalid_fs:
return -1;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lseek, 3.4.0);
-
static ssize_t
glfs_preadv_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
- off_t offset, int flags, struct stat *poststat)
+ off_t offset, int flags, struct glfs_stat *poststat)
{
xlator_t *subvol = NULL;
ssize_t ret = -1;
@@ -809,7 +1052,7 @@ glfs_preadv_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
DECODE_SYNCOP_ERR(ret);
if (ret >= 0 && poststat)
- glfs_iatt_to_stat(glfd->fs, &iatt, poststat);
+ glfs_iatt_to_statx(glfd->fs, &iatt, poststat);
if (ret <= 0)
goto out;
@@ -840,6 +1083,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv, 3.4.0)
ssize_t
pub_glfs_preadv(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
off_t offset, int flags)
@@ -847,8 +1091,7 @@ pub_glfs_preadv(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
return glfs_preadv_common(glfd, iovec, iovcnt, offset, flags, NULL);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read, 3.4.0)
ssize_t
pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags)
{
@@ -857,6 +1100,11 @@ pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags)
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = buf;
iov.iov_len = count;
@@ -865,8 +1113,7 @@ pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags)
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC(glfs_pread34, glfs_pread, 3.4.0)
ssize_t
pub_glfs_pread34(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
int flags)
@@ -884,11 +1131,10 @@ pub_glfs_pread34(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_pread34, glfs_pread, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread, 6.0)
ssize_t
pub_glfs_pread(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
- int flags, struct stat *poststat)
+ int flags, struct glfs_stat *poststat)
{
struct iovec iov = {
0,
@@ -903,21 +1149,23 @@ pub_glfs_pread(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread, future);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv, 3.4.0)
ssize_t
pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count,
int flags)
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = pub_glfs_preadv(glfd, iov, count, glfd->offset, flags);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv, 3.4.0);
-
struct glfs_io {
struct glfs_fd *glfd;
int op;
@@ -943,8 +1191,8 @@ glfs_io_async_cbk(int op_ret, int op_errno, call_frame_t *frame, void *cookie,
struct glfs *fs = NULL;
struct glfs_fd *glfd = NULL;
int ret = -1;
- struct stat prestat = {}, *prestatp = NULL;
- struct stat poststat = {}, *poststatp = NULL;
+ struct glfs_stat prestat = {}, *prestatp = NULL;
+ struct glfs_stat poststat = {}, *poststatp = NULL;
GF_VALIDATE_OR_GOTO("gfapi", frame, inval);
GF_VALIDATE_OR_GOTO("gfapi", cookie, inval);
@@ -980,12 +1228,12 @@ out:
} else {
if (prebuf) {
prestatp = &prestat;
- glfs_iatt_to_stat(fs, prebuf, prestatp);
+ glfs_iatt_to_statx(fs, prebuf, prestatp);
}
if (postbuf) {
poststatp = &poststat;
- glfs_iatt_to_stat(fs, postbuf, poststatp);
+ glfs_iatt_to_statx(fs, postbuf, poststatp);
}
gio->fn(gio->glfd, op_ret, prestatp, poststatp, gio->data);
@@ -1119,6 +1367,7 @@ invalid_fs:
return -1;
}
+GFAPI_SYMVER_PUBLIC(glfs_preadv_async34, glfs_preadv_async, 3.4.0)
int
pub_glfs_preadv_async34(struct glfs_fd *glfd, const struct iovec *iovec,
int count, off_t offset, int flags, glfs_io_cbk34 fn,
@@ -1128,8 +1377,7 @@ pub_glfs_preadv_async34(struct glfs_fd *glfd, const struct iovec *iovec,
(void *)fn, data);
}
-GFAPI_SYMVER_PUBLIC(glfs_preadv_async34, glfs_preadv_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv_async, 6.0)
int
pub_glfs_preadv_async(struct glfs_fd *glfd, const struct iovec *iovec,
int count, off_t offset, int flags, glfs_io_cbk fn,
@@ -1139,8 +1387,7 @@ pub_glfs_preadv_async(struct glfs_fd *glfd, const struct iovec *iovec,
_gf_false, fn, data);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv_async, future);
-
+GFAPI_SYMVER_PUBLIC(glfs_read_async34, glfs_read_async, 3.4.0)
int
pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags,
glfs_io_cbk34 fn, void *data)
@@ -1150,6 +1397,11 @@ pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags,
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = buf;
iov.iov_len = count;
@@ -1159,8 +1411,7 @@ pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags,
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_read_async34, glfs_read_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read_async, 6.0)
int
pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags,
glfs_io_cbk fn, void *data)
@@ -1170,6 +1421,11 @@ pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags,
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = buf;
iov.iov_len = count;
@@ -1179,8 +1435,7 @@ pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags,
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read_async, future);
-
+GFAPI_SYMVER_PUBLIC(glfs_pread_async34, glfs_pread_async, 3.4.0)
int
pub_glfs_pread_async34(struct glfs_fd *glfd, void *buf, size_t count,
off_t offset, int flags, glfs_io_cbk34 fn, void *data)
@@ -1199,8 +1454,7 @@ pub_glfs_pread_async34(struct glfs_fd *glfd, void *buf, size_t count,
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_pread_async34, glfs_pread_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread_async, 6.0)
int
pub_glfs_pread_async(struct glfs_fd *glfd, void *buf, size_t count,
off_t offset, int flags, glfs_io_cbk fn, void *data)
@@ -1219,38 +1473,44 @@ pub_glfs_pread_async(struct glfs_fd *glfd, void *buf, size_t count,
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread_async, future);
-
+GFAPI_SYMVER_PUBLIC(glfs_readv_async34, glfs_readv_async, 3.4.0)
int
pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count,
int flags, glfs_io_cbk34 fn, void *data)
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags,
_gf_true, (void *)fn, data);
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_readv_async34, glfs_readv_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv_async, 6.0)
int
pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
int flags, glfs_io_cbk fn, void *data)
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags,
_gf_false, fn, data);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv_async, future);
-
static ssize_t
glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
- off_t offset, int flags, struct stat *prestat,
- struct stat *poststat)
+ off_t offset, int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
{
xlator_t *subvol = NULL;
int ret = -1;
@@ -1303,9 +1563,9 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
if (ret >= 0) {
if (prestat)
- glfs_iatt_to_stat(glfd->fs, &preiatt, prestat);
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
if (poststat)
- glfs_iatt_to_stat(glfd->fs, &postiatt, poststat);
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
}
if (ret <= 0)
@@ -1332,6 +1592,161 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, 6.0)
+ssize_t
+pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+ unsigned int flags, struct glfs_stat *statbuf,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd_in = NULL;
+ fd_t *fd_out = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ iattbuf =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+ off64_t pos_in;
+ off64_t pos_out;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd_in, invalid_fs);
+ __GLFS_ENTRY_VALIDATE_FD(glfd_out, invalid_fs);
+
+ GF_REF_GET(glfd_in);
+ GF_REF_GET(glfd_out);
+
+ if (glfd_in->fs != glfd_out->fs) {
+ ret = -1;
+ errno = EXDEV;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd_in->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd_in = glfs_resolve_fd(glfd_in->fs, subvol, glfd_in);
+ if (!fd_in) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fd_out = glfs_resolve_fd(glfd_out->fs, subvol, glfd_out);
+ if (!fd_out) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ /*
+ * This is based on how the vfs layer in the kernel handles
+ * copy_file_range call. Upon receiving it follows the
+ * below method to consider the offset.
+ * if (off_in != NULL)
+ * use the value off_in to perform the op
+ * else if off_in == NULL
+ * use the current file offset position to perform the op
+ *
+ * For gfapi, glfd->offset is used. For a freshly opened
+ * fd, the offset is set to 0.
+ */
+ if (off_in)
+ pos_in = *off_in;
+ else
+ pos_in = glfd_in->offset;
+
+ if (off_out)
+ pos_out = *off_out;
+ else
+ pos_out = glfd_out->offset;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_copy_file_range(subvol, fd_in, pos_in, fd_out, pos_out, len,
+ flags, &iattbuf, &preiatt, &postiatt, fop_attr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ pos_in += ret;
+ pos_out += ret;
+
+ if (off_in)
+ *off_in = pos_in;
+ if (off_out)
+ *off_out = pos_out;
+
+ if (statbuf)
+ glfs_iatt_to_statx(glfd_in->fs, &iattbuf, statbuf);
+ if (prestat)
+ glfs_iatt_to_statx(glfd_in->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd_in->fs, &postiatt, poststat);
+ }
+
+ if (ret <= 0)
+ goto out;
+
+ /*
+ * If *off_in is NULL, then there is no offset info that can
+ * obtained from the input argument. Hence follow below method.
+ * If *off_in is NULL, then
+ * glfd->offset = offset + ret;
+ * else
+ * do nothing.
+ *
+ * According to the man page of copy_file_range, if off_in is
+ * NULL, then the offset of the source file is advanced by
+ * the return value of the fop. The same applies to off_out as
+ * well. Otherwise, if *off_in is not NULL, then the offset
+ * is not advanced by the filesystem. The entity which sends
+ * the copy_file_range call is supposed to advance the offset
+ * value in its buffer (pointed to by *off_in or *off_out)
+ * by the return value of copy_file_range.
+ */
+ if (!off_in)
+ glfd_in->offset += ret;
+
+ if (!off_out)
+ glfd_out->offset += ret;
+
+out:
+ if (fd_in)
+ fd_unref(fd_in);
+ if (fd_out)
+ fd_unref(fd_out);
+ if (glfd_in)
+ GF_REF_PUT(glfd_in);
+ if (glfd_out)
+ GF_REF_PUT(glfd_out);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd_in->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev, 3.4.0)
ssize_t
pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
off_t offset, int flags)
@@ -1339,8 +1754,7 @@ pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
return glfs_pwritev_common(glfd, iovec, iovcnt, offset, flags, NULL, NULL);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write, 3.4.0)
ssize_t
pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags)
{
@@ -1349,6 +1763,11 @@ pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags)
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = (void *)buf;
iov.iov_len = count;
@@ -1357,21 +1776,24 @@ pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags)
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev, 3.4.0)
ssize_t
pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count,
int flags)
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = pub_glfs_pwritev(glfd, iov, count, glfd->offset, flags);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC(glfs_pwrite34, glfs_pwrite, 3.4.0)
ssize_t
pub_glfs_pwrite34(struct glfs_fd *glfd, const void *buf, size_t count,
off_t offset, int flags)
@@ -1389,12 +1811,11 @@ pub_glfs_pwrite34(struct glfs_fd *glfd, const void *buf, size_t count,
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_pwrite34, glfs_pwrite, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite, 6.0)
ssize_t
pub_glfs_pwrite(struct glfs_fd *glfd, const void *buf, size_t count,
- off_t offset, int flags, struct stat *prestat,
- struct stat *poststat)
+ off_t offset, int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
{
struct iovec iov = {
0,
@@ -1409,8 +1830,6 @@ pub_glfs_pwrite(struct glfs_fd *glfd, const void *buf, size_t count,
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite, future);
-
extern glfs_t *
pub_glfs_from_glfd(glfs_fd_t *);
@@ -1529,6 +1948,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC(glfs_pwritev_async34, glfs_pwritev_async, 3.4.0)
int
pub_glfs_pwritev_async34(struct glfs_fd *glfd, const struct iovec *iovec,
int count, off_t offset, int flags, glfs_io_cbk34 fn,
@@ -1538,8 +1958,7 @@ pub_glfs_pwritev_async34(struct glfs_fd *glfd, const struct iovec *iovec,
_gf_true, (void *)fn, data);
}
-GFAPI_SYMVER_PUBLIC(glfs_pwritev_async34, glfs_pwritev_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev_async, 6.0)
int
pub_glfs_pwritev_async(struct glfs_fd *glfd, const struct iovec *iovec,
int count, off_t offset, int flags, glfs_io_cbk fn,
@@ -1549,8 +1968,7 @@ pub_glfs_pwritev_async(struct glfs_fd *glfd, const struct iovec *iovec,
_gf_false, fn, data);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev_async, future);
-
+GFAPI_SYMVER_PUBLIC(glfs_write_async34, glfs_write_async, 3.4.0)
int
pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count,
int flags, glfs_io_cbk34 fn, void *data)
@@ -1560,6 +1978,11 @@ pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count,
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = (void *)buf;
iov.iov_len = count;
@@ -1569,8 +1992,7 @@ pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count,
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_write_async34, glfs_write_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write_async, 6.0)
int
pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count,
int flags, glfs_io_cbk fn, void *data)
@@ -1580,6 +2002,11 @@ pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count,
};
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
iov.iov_base = (void *)buf;
iov.iov_len = count;
@@ -1589,8 +2016,7 @@ pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count,
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write_async, future);
-
+GFAPI_SYMVER_PUBLIC(glfs_pwrite_async34, glfs_pwrite_async, 3.4.0)
int
pub_glfs_pwrite_async34(struct glfs_fd *glfd, const void *buf, int count,
off_t offset, int flags, glfs_io_cbk34 fn, void *data)
@@ -1609,8 +2035,7 @@ pub_glfs_pwrite_async34(struct glfs_fd *glfd, const void *buf, int count,
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_pwrite_async34, glfs_pwrite_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite_async, 6.0)
int
pub_glfs_pwrite_async(struct glfs_fd *glfd, const void *buf, int count,
off_t offset, int flags, glfs_io_cbk fn, void *data)
@@ -1629,37 +2054,43 @@ pub_glfs_pwrite_async(struct glfs_fd *glfd, const void *buf, int count,
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite_async, future);
-
+GFAPI_SYMVER_PUBLIC(glfs_writev_async34, glfs_writev_async, 3.4.0)
int
pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov,
int count, int flags, glfs_io_cbk34 fn, void *data)
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags,
_gf_true, (void *)fn, data);
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_writev_async34, glfs_writev_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev_async, 6.0)
int
pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
int flags, glfs_io_cbk fn, void *data)
{
ssize_t ret = 0;
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags,
_gf_false, fn, data);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev_async, future);
-
static int
-glfs_fsync_common(struct glfs_fd *glfd, struct stat *prestat,
- struct stat *poststat)
+glfs_fsync_common(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
{
int ret = -1;
xlator_t *subvol = NULL;
@@ -1701,9 +2132,9 @@ glfs_fsync_common(struct glfs_fd *glfd, struct stat *prestat,
if (ret >= 0) {
if (prestat)
- glfs_iatt_to_stat(glfd->fs, &preiatt, prestat);
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
if (poststat)
- glfs_iatt_to_stat(glfd->fs, &postiatt, poststat);
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
}
out:
if (fd)
@@ -1721,23 +2152,21 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC(glfs_fsync34, glfs_fsync, 3.4.0)
int
pub_glfs_fsync34(struct glfs_fd *glfd)
{
return glfs_fsync_common(glfd, NULL, NULL);
}
-GFAPI_SYMVER_PUBLIC(glfs_fsync34, glfs_fsync, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync, 6.0)
int
-pub_glfs_fsync(struct glfs_fd *glfd, struct stat *prestat,
- struct stat *poststat)
+pub_glfs_fsync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
{
return glfs_fsync_common(glfd, prestat, poststat);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync, future);
-
static int
glfs_fsync_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
@@ -1818,6 +2247,7 @@ out:
return ret;
}
+GFAPI_SYMVER_PUBLIC(glfs_fsync_async34, glfs_fsync_async, 3.4.0)
int
pub_glfs_fsync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data)
{
@@ -1834,8 +2264,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_fsync_async34, glfs_fsync_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync_async, 6.0)
int
pub_glfs_fsync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
{
@@ -1852,11 +2281,9 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync_async, future);
-
static int
-glfs_fdatasync_common(struct glfs_fd *glfd, struct stat *prestat,
- struct stat *poststat)
+glfs_fdatasync_common(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
{
int ret = -1;
xlator_t *subvol = NULL;
@@ -1898,9 +2325,9 @@ glfs_fdatasync_common(struct glfs_fd *glfd, struct stat *prestat,
if (ret >= 0) {
if (prestat)
- glfs_iatt_to_stat(glfd->fs, &preiatt, prestat);
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
if (poststat)
- glfs_iatt_to_stat(glfd->fs, &postiatt, poststat);
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
}
out:
if (fd)
@@ -1918,23 +2345,22 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC(glfs_fdatasync34, glfs_fdatasync, 3.4.0)
int
pub_glfs_fdatasync34(struct glfs_fd *glfd)
{
return glfs_fdatasync_common(glfd, NULL, NULL);
}
-GFAPI_SYMVER_PUBLIC(glfs_fdatasync34, glfs_fdatasync, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync, 6.0)
int
-pub_glfs_fdatasync(struct glfs_fd *glfd, struct stat *prestat,
- struct stat *poststat)
+pub_glfs_fdatasync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
{
return glfs_fdatasync_common(glfd, prestat, poststat);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync, future);
-
+GFAPI_SYMVER_PUBLIC(glfs_fdatasync_async34, glfs_fdatasync_async, 3.4.0)
int
pub_glfs_fdatasync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data)
{
@@ -1951,8 +2377,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_fdatasync_async34, glfs_fdatasync_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync_async, 6.0)
int
pub_glfs_fdatasync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
{
@@ -1969,11 +2394,9 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync_async, future);
-
static int
-glfs_ftruncate_common(struct glfs_fd *glfd, off_t offset, struct stat *prestat,
- struct stat *poststat)
+glfs_ftruncate_common(struct glfs_fd *glfd, off_t offset,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
{
int ret = -1;
xlator_t *subvol = NULL;
@@ -2016,9 +2439,9 @@ glfs_ftruncate_common(struct glfs_fd *glfd, off_t offset, struct stat *prestat,
if (ret >= 0) {
if (prestat)
- glfs_iatt_to_stat(glfd->fs, &preiatt, prestat);
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
if (poststat)
- glfs_iatt_to_stat(glfd->fs, &postiatt, poststat);
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
}
out:
if (fd)
@@ -2036,23 +2459,22 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC(glfs_ftruncate34, glfs_ftruncate, 3.4.0)
int
pub_glfs_ftruncate34(struct glfs_fd *glfd, off_t offset)
{
return glfs_ftruncate_common(glfd, offset, NULL, NULL);
}
-GFAPI_SYMVER_PUBLIC(glfs_ftruncate34, glfs_ftruncate, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate, 6.0)
int
-pub_glfs_ftruncate(struct glfs_fd *glfd, off_t offset, struct stat *prestat,
- struct stat *poststat)
+pub_glfs_ftruncate(struct glfs_fd *glfd, off_t offset,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
{
return glfs_ftruncate_common(glfd, offset, prestat, poststat);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate, future);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_truncate, 3.7.15)
int
pub_glfs_truncate(struct glfs *fs, const char *path, off_t length)
{
@@ -2098,8 +2520,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_truncate, 3.7.15);
-
static int
glfs_ftruncate_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
@@ -2192,6 +2612,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC(glfs_ftruncate_async34, glfs_ftruncate_async, 3.4.0)
int
pub_glfs_ftruncate_async34(struct glfs_fd *glfd, off_t offset, glfs_io_cbk34 fn,
void *data)
@@ -2200,8 +2621,7 @@ pub_glfs_ftruncate_async34(struct glfs_fd *glfd, off_t offset, glfs_io_cbk34 fn,
data);
}
-GFAPI_SYMVER_PUBLIC(glfs_ftruncate_async34, glfs_ftruncate_async, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate_async, 6.0)
int
pub_glfs_ftruncate_async(struct glfs_fd *glfd, off_t offset, glfs_io_cbk fn,
void *data)
@@ -2209,8 +2629,7 @@ pub_glfs_ftruncate_async(struct glfs_fd *glfd, off_t offset, glfs_io_cbk fn,
return glfs_ftruncate_async_common(glfd, offset, _gf_false, fn, data);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate_async, future);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_access, 3.4.0)
int
pub_glfs_access(struct glfs *fs, const char *path, int mode)
{
@@ -2256,8 +2675,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_access, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_symlink, 3.4.0)
int
pub_glfs_symlink(struct glfs *fs, const char *data, const char *path)
{
@@ -2347,8 +2765,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_symlink, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readlink, 3.4.0)
int
pub_glfs_readlink(struct glfs *fs, const char *path, char *buf, size_t bufsiz)
{
@@ -2405,8 +2822,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readlink, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mknod, 3.4.0)
int
pub_glfs_mknod(struct glfs *fs, const char *path, mode_t mode, dev_t dev)
{
@@ -2496,8 +2912,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mknod, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mkdir, 3.4.0)
int
pub_glfs_mkdir(struct glfs *fs, const char *path, mode_t mode)
{
@@ -2587,8 +3002,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mkdir, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unlink, 3.4.0)
int
pub_glfs_unlink(struct glfs *fs, const char *path)
{
@@ -2644,8 +3058,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unlink, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rmdir, 3.4.0)
int
pub_glfs_rmdir(struct glfs *fs, const char *path)
{
@@ -2700,8 +3113,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rmdir, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rename, 3.4.0)
int
pub_glfs_rename(struct glfs *fs, const char *oldpath, const char *newpath)
{
@@ -2790,8 +3202,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rename, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_link, 3.4.0)
int
pub_glfs_link(struct glfs *fs, const char *oldpath, const char *newpath)
{
@@ -2877,8 +3288,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_link, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_opendir, 3.4.0)
struct glfs_fd *
pub_glfs_opendir(struct glfs *fs, const char *path)
{
@@ -2959,8 +3369,7 @@ invalid_fs:
return glfd;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_opendir, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_closedir, 3.4.0)
int
pub_glfs_closedir(struct glfs_fd *glfd)
{
@@ -2981,22 +3390,30 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_closedir, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0)
long
pub_glfs_telldir(struct glfs_fd *fd)
{
+ if (fd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
return fd->offset;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_seekdir, 3.4.0)
void
pub_glfs_seekdir(struct glfs_fd *fd, long offset)
{
gf_dirent_t *entry = NULL;
gf_dirent_t *tmp = NULL;
+ if (fd == NULL) {
+ errno = EBADF;
+ return;
+ }
+
if (fd->offset == offset)
return;
@@ -3019,8 +3436,6 @@ pub_glfs_seekdir(struct glfs_fd *fd, long offset)
*/
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_seekdir, 3.4.0);
-
static int
glfs_discard_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@@ -3111,6 +3526,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC(glfs_discard_async35, glfs_discard_async, 3.5.0)
int
pub_glfs_discard_async35(struct glfs_fd *glfd, off_t offset, size_t len,
glfs_io_cbk34 fn, void *data)
@@ -3119,8 +3535,7 @@ pub_glfs_discard_async35(struct glfs_fd *glfd, off_t offset, size_t len,
data);
}
-GFAPI_SYMVER_PUBLIC(glfs_discard_async35, glfs_discard_async, 3.5.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard_async, 6.0)
int
pub_glfs_discard_async(struct glfs_fd *glfd, off_t offset, size_t len,
glfs_io_cbk fn, void *data)
@@ -3128,8 +3543,6 @@ pub_glfs_discard_async(struct glfs_fd *glfd, off_t offset, size_t len,
return glfs_discard_async_common(glfd, offset, len, _gf_false, fn, data);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard_async, future);
-
static int
glfs_zerofill_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@@ -3222,6 +3635,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC(glfs_zerofill_async35, glfs_zerofill_async, 3.5.0)
int
pub_glfs_zerofill_async35(struct glfs_fd *glfd, off_t offset, off_t len,
glfs_io_cbk34 fn, void *data)
@@ -3230,8 +3644,7 @@ pub_glfs_zerofill_async35(struct glfs_fd *glfd, off_t offset, off_t len,
data);
}
-GFAPI_SYMVER_PUBLIC(glfs_zerofill_async35, glfs_zerofill_async, 3.5.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill_async, 6.0)
int
pub_glfs_zerofill_async(struct glfs_fd *glfd, off_t offset, off_t len,
glfs_io_cbk fn, void *data)
@@ -3239,8 +3652,6 @@ pub_glfs_zerofill_async(struct glfs_fd *glfd, off_t offset, off_t len,
return glfs_zerofill_async_common(glfd, offset, len, _gf_false, fn, data);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill_async, future);
-
void
gf_dirent_to_dirent(gf_dirent_t *gf_dirent, struct dirent *dirent)
{
@@ -3258,7 +3669,7 @@ gf_dirent_to_dirent(gf_dirent_t *gf_dirent, struct dirent *dirent)
dirent->d_namlen = strlen(gf_dirent->d_name);
#endif
- snprintf(dirent->d_name, NAME_MAX, "%s", gf_dirent->d_name);
+ snprintf(dirent->d_name, NAME_MAX + 1, "%s", gf_dirent->d_name);
}
int
@@ -3400,6 +3811,7 @@ unlock:
return buf;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus_r, 3.4.0)
int
pub_glfs_readdirplus_r(struct glfs_fd *glfd, struct stat *stat,
struct dirent *ext, struct dirent **res)
@@ -3455,8 +3867,7 @@ invalid_fs:
return -1;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus_r, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir_r, 3.4.0)
int
pub_glfs_readdir_r(struct glfs_fd *glfd, struct dirent *buf,
struct dirent **res)
@@ -3464,8 +3875,7 @@ pub_glfs_readdir_r(struct glfs_fd *glfd, struct dirent *buf,
return pub_glfs_readdirplus_r(glfd, 0, buf, res);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir_r, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus, 3.5.0)
struct dirent *
pub_glfs_readdirplus(struct glfs_fd *glfd, struct stat *stat)
{
@@ -3479,16 +3889,14 @@ pub_glfs_readdirplus(struct glfs_fd *glfd, struct stat *stat)
return res;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus, 3.5.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir, 3.5.0)
struct dirent *
pub_glfs_readdir(struct glfs_fd *glfd)
{
return pub_glfs_readdirplus(glfd, NULL);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir, 3.5.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_statvfs, 3.4.0)
int
pub_glfs_statvfs(struct glfs *fs, const char *path, struct statvfs *buf)
{
@@ -3534,13 +3942,13 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_statvfs, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setattr, 6.0)
int
-glfs_setattr(struct glfs *fs, const char *path, struct iatt *iatt, int valid,
- int follow)
+pub_glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+ int follow)
{
int ret = -1;
+ int glvalid;
xlator_t *subvol = NULL;
loc_t loc = {
0,
@@ -3548,11 +3956,16 @@ glfs_setattr(struct glfs *fs, const char *path, struct iatt *iatt, int valid,
struct iatt riatt = {
0,
};
+ struct iatt iatt = {
+ 0,
+ };
int reval = 0;
DECLARE_OLD_THIS;
__GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+ GF_VALIDATE_OR_GOTO("glfs_setattr", stat, out);
+
subvol = glfs_active_subvol(fs);
if (!subvol) {
ret = -1;
@@ -3570,8 +3983,11 @@ retry:
if (ret)
goto out;
+ glfs_iatt_from_statx(&iatt, stat);
+ glfsflags_from_gfapiflags(stat, &glvalid);
+
/* TODO : Add leaseid */
- ret = syncop_setattr(subvol, &loc, iatt, valid, 0, 0, NULL, NULL);
+ ret = syncop_setattr(subvol, &loc, &iatt, glvalid, 0, 0, NULL, NULL);
DECODE_SYNCOP_ERR(ret);
ESTALE_RETRY(ret, errno, reval, &loc, retry);
@@ -3586,10 +4002,15 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetattr, 6.0)
int
-glfs_fsetattr(struct glfs_fd *glfd, struct iatt *iatt, int valid)
+pub_glfs_fsetattr(struct glfs_fd *glfd, struct glfs_stat *stat)
{
int ret = -1;
+ int glvalid;
+ struct iatt iatt = {
+ 0,
+ };
xlator_t *subvol = NULL;
fd_t *fd = NULL;
@@ -3598,6 +4019,8 @@ glfs_fsetattr(struct glfs_fd *glfd, struct iatt *iatt, int valid)
GF_REF_GET(glfd);
+ GF_VALIDATE_OR_GOTO("glfs_fsetattr", stat, out);
+
subvol = glfs_active_subvol(glfd->fs);
if (!subvol) {
ret = -1;
@@ -3612,8 +4035,11 @@ glfs_fsetattr(struct glfs_fd *glfd, struct iatt *iatt, int valid)
goto out;
}
+ glfs_iatt_from_statx(&iatt, stat);
+ glfsflags_from_gfapiflags(stat, &glvalid);
+
/* TODO : Add leaseid */
- ret = syncop_fsetattr(subvol, fd, iatt, valid, 0, 0, NULL, NULL);
+ ret = syncop_fsetattr(subvol, fd, &iatt, glvalid, 0, 0, NULL, NULL);
DECODE_SYNCOP_ERR(ret);
out:
if (fd)
@@ -3629,196 +4055,174 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chmod, 3.4.0)
int
pub_glfs_chmod(struct glfs *fs, const char *path, mode_t mode)
{
int ret = -1;
- struct iatt iatt = {
+ struct glfs_stat stat = {
0,
};
- int valid = 0;
- iatt.ia_prot = ia_prot_from_st_mode(mode);
- valid = GF_SET_ATTR_MODE;
+ stat.glfs_st_mode = mode;
+ stat.glfs_st_mask = GLFS_STAT_MODE;
- ret = glfs_setattr(fs, path, &iatt, valid, 1);
+ ret = glfs_setattr(fs, path, &stat, 1);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chmod, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchmod, 3.4.0)
int
pub_glfs_fchmod(struct glfs_fd *glfd, mode_t mode)
{
int ret = -1;
- struct iatt iatt = {
+ struct glfs_stat stat = {
0,
};
- int valid = 0;
- iatt.ia_prot = ia_prot_from_st_mode(mode);
- valid = GF_SET_ATTR_MODE;
+ stat.glfs_st_mode = mode;
+ stat.glfs_st_mask = GLFS_STAT_MODE;
- ret = glfs_fsetattr(glfd, &iatt, valid);
+ ret = glfs_fsetattr(glfd, &stat);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchmod, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chown, 3.4.0)
int
pub_glfs_chown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
{
int ret = 0;
- int valid = 0;
- struct iatt iatt = {
+ struct glfs_stat stat = {
0,
};
if (uid != (uid_t)-1) {
- iatt.ia_uid = uid;
- valid = GF_SET_ATTR_UID;
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
}
if (gid != (uid_t)-1) {
- iatt.ia_gid = gid;
- valid = valid | GF_SET_ATTR_GID;
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
}
- if (valid)
- ret = glfs_setattr(fs, path, &iatt, valid, 1);
+ if (stat.glfs_st_mask)
+ ret = glfs_setattr(fs, path, &stat, 1);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chown, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lchown, 3.4.0)
int
pub_glfs_lchown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
{
int ret = 0;
- int valid = 0;
- struct iatt iatt = {
+ struct glfs_stat stat = {
0,
};
if (uid != (uid_t)-1) {
- iatt.ia_uid = uid;
- valid = GF_SET_ATTR_UID;
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
}
if (gid != (uid_t)-1) {
- iatt.ia_gid = gid;
- valid = valid | GF_SET_ATTR_GID;
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
}
- if (valid)
- ret = glfs_setattr(fs, path, &iatt, valid, 0);
+ if (stat.glfs_st_mask)
+ ret = glfs_setattr(fs, path, &stat, 0);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lchown, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchown, 3.4.0)
int
pub_glfs_fchown(struct glfs_fd *glfd, uid_t uid, gid_t gid)
{
int ret = 0;
- int valid = 0;
- struct iatt iatt = {
+ struct glfs_stat stat = {
0,
};
if (uid != (uid_t)-1) {
- iatt.ia_uid = uid;
- valid = GF_SET_ATTR_UID;
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
}
if (gid != (uid_t)-1) {
- iatt.ia_gid = gid;
- valid = valid | GF_SET_ATTR_GID;
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
}
- if (valid)
- ret = glfs_fsetattr(glfd, &iatt, valid);
+ if (stat.glfs_st_mask)
+ ret = glfs_fsetattr(glfd, &stat);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchown, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_utimens, 3.4.0)
int
pub_glfs_utimens(struct glfs *fs, const char *path,
const struct timespec times[2])
{
int ret = -1;
- int valid = 0;
- struct iatt iatt = {
+ struct glfs_stat stat = {
0,
};
- iatt.ia_atime = times[0].tv_sec;
- iatt.ia_atime_nsec = times[0].tv_nsec;
- iatt.ia_mtime = times[1].tv_sec;
- iatt.ia_mtime_nsec = times[1].tv_nsec;
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
- ret = glfs_setattr(fs, path, &iatt, valid, 1);
+ ret = glfs_setattr(fs, path, &stat, 1);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_utimens, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lutimens, 3.4.0)
int
pub_glfs_lutimens(struct glfs *fs, const char *path,
const struct timespec times[2])
{
int ret = -1;
- int valid = 0;
- struct iatt iatt = {
+ struct glfs_stat stat = {
0,
};
- iatt.ia_atime = times[0].tv_sec;
- iatt.ia_atime_nsec = times[0].tv_nsec;
- iatt.ia_mtime = times[1].tv_sec;
- iatt.ia_mtime_nsec = times[1].tv_nsec;
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
- ret = glfs_setattr(fs, path, &iatt, valid, 0);
+ ret = glfs_setattr(fs, path, &stat, 0);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lutimens, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_futimens, 3.4.0)
int
pub_glfs_futimens(struct glfs_fd *glfd, const struct timespec times[2])
{
int ret = -1;
- int valid = 0;
- struct iatt iatt = {
+ struct glfs_stat stat = {
0,
};
- iatt.ia_atime = times[0].tv_sec;
- iatt.ia_atime_nsec = times[0].tv_nsec;
- iatt.ia_mtime = times[1].tv_sec;
- iatt.ia_mtime_nsec = times[1].tv_nsec;
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
- ret = glfs_fsetattr(glfd, &iatt, valid);
+ ret = glfs_fsetattr(glfd, &stat);
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_futimens, 3.4.0);
-
int
glfs_getxattr_process(void *value, size_t size, dict_t *xattr, const char *name)
{
@@ -3918,6 +4322,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getxattr, 3.4.0)
ssize_t
pub_glfs_getxattr(struct glfs *fs, const char *path, const char *name,
void *value, size_t size)
@@ -3925,8 +4330,7 @@ pub_glfs_getxattr(struct glfs *fs, const char *path, const char *name,
return glfs_getxattr_common(fs, path, name, value, size, 1);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getxattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lgetxattr, 3.4.0)
ssize_t
pub_glfs_lgetxattr(struct glfs *fs, const char *path, const char *name,
void *value, size_t size)
@@ -3934,8 +4338,7 @@ pub_glfs_lgetxattr(struct glfs *fs, const char *path, const char *name,
return glfs_getxattr_common(fs, path, name, value, size, 0);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lgetxattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fgetxattr, 3.4.0)
ssize_t
pub_glfs_fgetxattr(struct glfs_fd *glfd, const char *name, void *value,
size_t size)
@@ -3998,8 +4401,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fgetxattr, 3.4.0);
-
int
glfs_listxattr_process(void *value, size_t size, dict_t *xattr)
{
@@ -4083,22 +4484,21 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_listxattr, 3.4.0)
ssize_t
pub_glfs_listxattr(struct glfs *fs, const char *path, void *value, size_t size)
{
return glfs_listxattr_common(fs, path, value, size, 1);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_listxattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_llistxattr, 3.4.0)
ssize_t
pub_glfs_llistxattr(struct glfs *fs, const char *path, void *value, size_t size)
{
return glfs_listxattr_common(fs, path, value, size, 0);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_llistxattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_flistxattr, 3.4.0)
ssize_t
pub_glfs_flistxattr(struct glfs_fd *glfd, void *value, size_t size)
{
@@ -4148,8 +4548,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_flistxattr, 3.4.0);
-
int
glfs_setxattr_common(struct glfs *fs, const char *path, const char *name,
const void *value, size_t size, int flags, int follow)
@@ -4229,6 +4627,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setxattr, 3.4.0)
int
pub_glfs_setxattr(struct glfs *fs, const char *path, const char *name,
const void *value, size_t size, int flags)
@@ -4236,8 +4635,7 @@ pub_glfs_setxattr(struct glfs *fs, const char *path, const char *name,
return glfs_setxattr_common(fs, path, name, value, size, flags, 1);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setxattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lsetxattr, 3.4.0)
int
pub_glfs_lsetxattr(struct glfs *fs, const char *path, const char *name,
const void *value, size_t size, int flags)
@@ -4245,8 +4643,7 @@ pub_glfs_lsetxattr(struct glfs *fs, const char *path, const char *name,
return glfs_setxattr_common(fs, path, name, value, size, flags, 0);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lsetxattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetxattr, 3.4.0)
int
pub_glfs_fsetxattr(struct glfs_fd *glfd, const char *name, const void *value,
size_t size, int flags)
@@ -4321,8 +4718,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetxattr, 3.4.0);
-
int
glfs_removexattr_common(struct glfs *fs, const char *path, const char *name,
int follow)
@@ -4373,22 +4768,21 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_removexattr, 3.4.0)
int
pub_glfs_removexattr(struct glfs *fs, const char *path, const char *name)
{
return glfs_removexattr_common(fs, path, name, 1);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_removexattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lremovexattr, 3.4.0)
int
pub_glfs_lremovexattr(struct glfs *fs, const char *path, const char *name)
{
return glfs_removexattr_common(fs, path, name, 0);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lremovexattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fremovexattr, 3.4.0)
int
pub_glfs_fremovexattr(struct glfs_fd *glfd, const char *name)
{
@@ -4431,8 +4825,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fremovexattr, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fallocate, 3.5.0)
int
pub_glfs_fallocate(struct glfs_fd *glfd, int keep_size, off_t offset,
size_t len)
@@ -4483,8 +4876,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fallocate, 3.5.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard, 3.5.0)
int
pub_glfs_discard(struct glfs_fd *glfd, off_t offset, size_t len)
{
@@ -4534,8 +4926,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard, 3.5.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill, 3.5.0)
int
pub_glfs_zerofill(struct glfs_fd *glfd, off_t offset, off_t len)
{
@@ -4583,8 +4974,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill, 3.5.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chdir, 3.4.0)
int
pub_glfs_chdir(struct glfs *fs, const char *path)
{
@@ -4634,8 +5024,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chdir, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchdir, 3.4.0)
int
pub_glfs_fchdir(struct glfs_fd *glfd)
{
@@ -4687,8 +5076,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchdir, 3.4.0);
-
static gf_boolean_t warn_realpath = _gf_true; /* log once */
static char *
@@ -4749,7 +5136,7 @@ retry:
goto out;
if (loc.path) {
- snprintf(retpath, PATH_MAX, "%s", loc.path);
+ snprintf(retpath, PATH_MAX + 1, "%s", loc.path);
}
out:
@@ -4771,22 +5158,21 @@ invalid_fs:
return retpath;
}
+GFAPI_SYMVER_PUBLIC(glfs_realpath34, glfs_realpath, 3.4.0)
char *
pub_glfs_realpath34(struct glfs *fs, const char *path, char *resolved_path)
{
return glfs_realpath_common(fs, path, resolved_path, _gf_true);
}
-GFAPI_SYMVER_PUBLIC(glfs_realpath34, glfs_realpath, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_realpath, 3.7.17)
char *
pub_glfs_realpath(struct glfs *fs, const char *path, char *resolved_path)
{
return glfs_realpath_common(fs, path, resolved_path, _gf_false);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_realpath, 3.7.17);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getcwd, 3.4.0)
char *
pub_glfs_getcwd(struct glfs *fs, char *buf, size_t n)
{
@@ -4835,8 +5221,6 @@ invalid_fs:
return buf;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getcwd, 3.4.0);
-
static void
gf_flock_to_flock(struct gf_flock *gf_flock, struct flock *flock)
{
@@ -4922,11 +5306,9 @@ glfs_lock_common(struct glfs_fd *glfd, int cmd, struct flock *flock,
if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW)) {
ret = fd_lk_insert_and_merge(fd, cmd, &saved_flock);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- API_MSG_LOCK_INSERT_MERGE_FAILED,
- "Lock insertion and splitting/merging failed "
- "on gfid %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ API_MSG_LOCK_INSERT_MERGE_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
ret = 0;
}
}
@@ -4945,6 +5327,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_file_lock, 4.0.0)
int
pub_glfs_file_lock(struct glfs_fd *glfd, int cmd, struct flock *flock,
glfs_lock_mode_t lk_mode)
@@ -4965,9 +5348,8 @@ pub_glfs_file_lock(struct glfs_fd *glfd, int cmd, struct flock *flock,
* GLFS_LK_MANDATORY */
ret = dict_set_uint32(xdata_in, GF_LOCK_MODE, GF_LK_MANDATORY);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- API_MSG_SETTING_LOCK_TYPE_FAILED,
- "Setting lock type failed");
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ API_MSG_SETTING_LOCK_TYPE_FAILED, NULL);
ret = -1;
errno = ENOMEM;
goto out;
@@ -4982,16 +5364,14 @@ out:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_file_lock, 4.0.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0)
int
pub_glfs_posix_lock(struct glfs_fd *glfd, int cmd, struct flock *flock)
{
return glfs_lock_common(glfd, cmd, flock, NULL);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7)
int
pub_glfs_fd_set_lkowner(struct glfs_fd *glfd, void *data, int len)
{
@@ -5008,8 +5388,8 @@ pub_glfs_fd_set_lkowner(struct glfs_fd *glfd, void *data, int len)
if ((len <= 0) || (len > GFAPI_MAX_LOCK_OWNER_LEN)) {
errno = EINVAL;
- gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_INVALID_ARG,
- "Invalid lk_owner len (%d)", len);
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "lk_owner len=%d", len, NULL);
goto out;
}
@@ -5027,8 +5407,8 @@ out:
invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dup, 3.4.0)
struct glfs_fd *
pub_glfs_dup(struct glfs_fd *glfd)
{
@@ -5079,8 +5459,6 @@ invalid_fs:
return dupfd;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dup, 3.4.0);
-
static void
glfs_enqueue_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
{
@@ -5093,8 +5471,8 @@ glfs_enqueue_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
u_list = GF_CALLOC(1, sizeof(*u_list), glfs_mt_upcall_entry_t);
if (!u_list) {
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
- "Upcall entry allocation failed.");
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
goto out;
}
@@ -5116,8 +5494,7 @@ glfs_enqueue_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
}
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY,
- "Upcall entry validation failed.");
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY, NULL);
goto out;
}
@@ -5159,9 +5536,7 @@ glfs_recall_lease_fd(struct glfs *fs, struct gf_upcall *up_data)
inode_t *inode = NULL;
struct glfs_fd *glfd = NULL;
struct glfs_fd *tmp = NULL;
- struct list_head glfd_list = {
- 0,
- };
+ struct list_head glfd_list;
fd_t *fd = NULL;
uint64_t value = 0;
struct glfs_lease lease = {
@@ -5189,9 +5564,9 @@ glfs_recall_lease_fd(struct glfs *fs, struct gf_upcall *up_data)
inode = inode_find(subvol->itable, up_data->gfid);
if (!inode) {
ret = -1;
- gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INODE_FIND_FAILED,
- "Unable to find inode entry for gfid:%s graph id:%d",
- uuid_utoa(up_data->gfid), subvol->graph->id);
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INODE_FIND_FAILED,
+ "gfid=%s", uuid_utoa(up_data->gfid), "graph_id=%d",
+ subvol->graph->id, NULL);
goto out;
}
@@ -5210,22 +5585,24 @@ glfs_recall_lease_fd(struct glfs *fs, struct gf_upcall *up_data)
}
UNLOCK(&inode->lock);
- list_for_each_entry_safe(glfd, tmp, &glfd_list, list)
- {
- LOCK(&glfd->lock);
+ if (!list_empty(&glfd_list)) {
+ list_for_each_entry_safe(glfd, tmp, &glfd_list, list)
{
- if (glfd->state != GLFD_CLOSE) {
- gf_msg_trace(THIS->name, 0,
- "glfd (%p) has held lease, "
- "calling recall cbk",
- glfd);
- glfd->cbk(lease, glfd->cookie);
+ LOCK(&glfd->lock);
+ {
+ if (glfd->state != GLFD_CLOSE) {
+ gf_msg_trace(THIS->name, 0,
+ "glfd (%p) has held lease, "
+ "calling recall cbk",
+ glfd);
+ glfd->cbk(lease, glfd->cookie);
+ }
}
- }
- UNLOCK(&glfd->lock);
+ UNLOCK(&glfd->lock);
- list_del_init(&glfd->list);
- GF_REF_PUT(glfd);
+ list_del_init(&glfd->list);
+ GF_REF_PUT(glfd);
+ }
}
out:
@@ -5267,8 +5644,8 @@ glfs_recall_lease_upcall(struct glfs *fs, struct glfs_upcall *up_arg,
* the handle and hence will no more be interested in
* the upcall for this particular gfid.
*/
- gf_msg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_CREATE_HANDLE_FAILED,
- "handle creation of %s failed", uuid_utoa(up_data->gfid));
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "gfid=%s", uuid_utoa(up_data->gfid), NULL);
errno = ESTALE;
goto out;
}
@@ -5300,22 +5677,65 @@ out:
}
static int
+upcall_syncop_args_free(struct upcall_syncop_args *args)
+{
+ dict_t *dict = NULL;
+ struct gf_upcall *upcall_data = NULL;
+
+ if (args) {
+ upcall_data = &args->upcall_data;
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ dict = ((struct gf_upcall_cache_invalidation *)(upcall_data
+ ->data))
+ ->dict;
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ dict = ((struct gf_upcall_recall_lease *)(upcall_data->data))
+ ->dict;
+ break;
+ }
+ if (dict)
+ dict_unref(dict);
+
+ GF_FREE(upcall_data->client_uid);
+ GF_FREE(upcall_data->data);
+ }
+ GF_FREE(args);
+ return 0;
+}
+
+static int
+glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ struct upcall_syncop_args *args = opaque;
+
+ (void)upcall_syncop_args_free(args);
+
+ return 0;
+}
+
+static int
glfs_cbk_upcall_syncop(void *opaque)
{
struct upcall_syncop_args *args = opaque;
- int ret = -1;
+ struct gf_upcall *upcall_data = NULL;
struct glfs_upcall *up_arg = NULL;
struct glfs *fs;
- struct gf_upcall *upcall_data;
+ int ret = -1;
fs = args->fs;
- upcall_data = args->upcall_data;
+ upcall_data = &args->upcall_data;
+
+ if (!upcall_data) {
+ goto out;
+ }
up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall), glfs_release_upcall,
glfs_mt_upcall_entry_t);
if (!up_arg) {
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
- "Upcall entry allocation failed.");
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
goto out;
}
@@ -5330,41 +5750,156 @@ glfs_cbk_upcall_syncop(void *opaque)
errno = EINVAL;
}
- if (!ret && (up_arg->reason != GLFS_UPCALL_EVENT_NULL)) {
- /* It could so happen that the file which got
- * upcall notification may have got deleted by
- * the same client. In such cases up_arg->reason
- * is set to GLFS_UPCALL_EVENT_NULL. No need to
- * send upcall then */
- (fs->up_cbk)(up_arg, fs->up_data);
- } else if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) {
- gf_msg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_INVALID_ENTRY,
- "Upcall_EVENT_NULL received. Skipping it.");
+ /* It could so happen that the file which got
+ * upcall notification may have got deleted by
+ * the same client. In such cases up_arg->reason
+ * is set to GLFS_UPCALL_EVENT_NULL. No need to
+ * send upcall then
+ */
+ if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) {
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_UPCALL_EVENT_NULL_RECEIVED, NULL);
+ ret = 0;
+ GLFS_FREE(up_arg);
goto out;
- } else {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY,
- "Upcall entry validation failed.");
+ } else if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY, NULL);
+ GLFS_FREE(up_arg);
goto out;
}
+ if (fs->up_cbk && up_arg)
+ (fs->up_cbk)(up_arg, fs->up_data);
+
/* application takes care of calling glfs_free on up_arg post
* their processing */
- ret = 0;
out:
- if (ret && up_arg) {
- GLFS_FREE(up_arg);
+ return ret;
+}
+
+static struct gf_upcall_cache_invalidation *
+gf_copy_cache_invalidation(struct gf_upcall_cache_invalidation *src)
+{
+ struct gf_upcall_cache_invalidation *dst = NULL;
+
+ if (!src)
+ goto out;
+
+ dst = GF_CALLOC(1, sizeof(struct gf_upcall_cache_invalidation),
+ glfs_mt_upcall_entry_t);
+
+ if (!dst) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
}
- return ret;
+ dst->flags = src->flags;
+ dst->expire_time_attr = src->expire_time_attr;
+ dst->stat = src->stat;
+ dst->p_stat = src->p_stat;
+ dst->oldp_stat = src->oldp_stat;
+
+ if (src->dict)
+ dst->dict = dict_copy_with_ref(src->dict, NULL);
+
+ return dst;
+out:
+ return NULL;
+}
+
+static struct gf_upcall_recall_lease *
+gf_copy_recall_lease(struct gf_upcall_recall_lease *src)
+{
+ struct gf_upcall_recall_lease *dst = NULL;
+
+ if (!src)
+ goto out;
+
+ dst = GF_CALLOC(1, sizeof(struct gf_upcall_recall_lease),
+ glfs_mt_upcall_entry_t);
+
+ if (!dst) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ dst->lease_type = src->lease_type;
+ memcpy(dst->tid, src->tid, 16);
+
+ if (src->dict)
+ dst->dict = dict_copy_with_ref(src->dict, NULL);
+
+ return dst;
+out:
+ return NULL;
+}
+
+static struct upcall_syncop_args *
+upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
+{
+ struct upcall_syncop_args *args = NULL;
+ int ret = -1;
+ struct gf_upcall *t_data = NULL;
+
+ if (!fs || !upcall_data)
+ goto out;
+
+ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args),
+ glfs_mt_upcall_entry_t);
+ if (!args) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
+ "syncop args", NULL);
+ goto out;
+ }
+
+ /* Note: we are not taking any ref on fs here.
+ * Ideally applications have to unregister for upcall events
+ * or stop polling for upcall events before performing
+ * glfs_fini. And as for outstanding synctasks created, we wait
+ * for all syncenv threads to finish tasks before cleaning up the
+ * fs->ctx. Hence it seems safe to process these callback
+ * notification without taking any lock/ref.
+ */
+ args->fs = fs;
+ t_data = &(args->upcall_data);
+ t_data->client_uid = gf_strdup(upcall_data->client_uid);
+
+ gf_uuid_copy(t_data->gfid, upcall_data->gfid);
+ t_data->event_type = upcall_data->event_type;
+
+ switch (t_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ t_data->data = gf_copy_cache_invalidation(
+ (struct gf_upcall_cache_invalidation *)upcall_data->data);
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ t_data->data = gf_copy_recall_lease(
+ (struct gf_upcall_recall_lease *)upcall_data->data);
+ break;
+ }
+
+ if (!t_data->data)
+ goto out;
+
+ return args;
+out:
+ if (ret) {
+ if (args) {
+ GF_FREE(args->upcall_data.client_uid);
+ GF_FREE(args);
+ }
+ }
+
+ return NULL;
}
static void
glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
{
- struct upcall_syncop_args args = {
- 0,
- };
+ struct upcall_syncop_args *args = NULL;
int ret = -1;
if (!fs || !upcall_data)
@@ -5375,16 +5910,19 @@ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
goto out;
}
- args.fs = fs;
- args.upcall_data = upcall_data;
+ args = upcall_syncop_args_init(fs, upcall_data);
- ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop, NULL, NULL,
- &args);
+ if (!args)
+ goto out;
+
+ ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop,
+ glfs_upcall_syncop_cbk, NULL, args);
/* should we retry incase of failure? */
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED,
- "Synctak for Upcall event_type(%d) and gfid(%s) failed",
- upcall_data->event_type, (char *)(upcall_data->gfid));
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED,
+ "event_type=%d", upcall_data->event_type, "gfid=%s",
+ (char *)(upcall_data->gfid), NULL);
+ upcall_syncop_args_free(args);
}
out:
@@ -5405,6 +5943,7 @@ out:
* Otherwise all the upcall events are queued up in a list
* to be read/polled by the applications.
*/
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_process_upcall_event, 3.7.0)
void
priv_glfs_process_upcall_event(struct glfs *fs, void *data)
{
@@ -5472,7 +6011,6 @@ out:
err:
return;
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_process_upcall_event, 3.7.0);
ssize_t
glfs_anonymous_pwritev(struct glfs *fs, struct glfs_object *object,
@@ -5511,8 +6049,7 @@ glfs_anonymous_pwritev(struct glfs *fs, struct glfs_object *object,
fd = fd_anonymous(inode);
if (!fd) {
ret = -1;
- gf_msg("gfapi", GF_LOG_ERROR, ENOMEM, API_MSG_FDCREATE_FAILED,
- "Allocating anonymous fd failed");
+ gf_smsg("gfapi", GF_LOG_ERROR, ENOMEM, API_MSG_FDCREATE_FAILED, NULL);
errno = ENOMEM;
goto out;
}
@@ -5610,8 +6147,7 @@ glfs_anonymous_preadv(struct glfs *fs, struct glfs_object *object,
fd = fd_anonymous(inode);
if (!fd) {
ret = -1;
- gf_msg("gfapi", GF_LOG_ERROR, ENOMEM, API_MSG_FDCREATE_FAILED,
- "Allocating anonymous fd failed");
+ gf_smsg("gfapi", GF_LOG_ERROR, ENOMEM, API_MSG_FDCREATE_FAILED, NULL);
errno = ENOMEM;
goto out;
}
@@ -5660,6 +6196,7 @@ glfs_release_xreaddirp_stat(void *ptr)
* Given glfd of a directory, this function does readdirp and returns
* xstat along with dirents.
*/
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0)
int
pub_glfs_xreaddirplus_r(struct glfs_fd *glfd, uint32_t flags,
struct glfs_xreaddirp_stat **xstat_p,
@@ -5754,8 +6291,8 @@ out:
GF_REF_PUT(glfd);
if (ret < 0) {
- gf_msg(THIS->name, GF_LOG_WARNING, errno, API_MSG_XREADDIRP_R_FAILED,
- "glfs_x_readdirp_r failed - reason (%s)", strerror(errno));
+ gf_smsg(THIS->name, GF_LOG_WARNING, errno, API_MSG_XREADDIRP_R_FAILED,
+ "reason=%s", strerror(errno), NULL);
if (xstat)
GLFS_FREE(xstat);
@@ -5768,41 +6305,43 @@ out:
invalid_fs:
return -1;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0)
struct stat *
pub_glfs_xreaddirplus_get_stat(struct glfs_xreaddirp_stat *xstat)
{
GF_VALIDATE_OR_GOTO("glfs_xreaddirplus_get_stat", xstat, out);
if (!xstat->flags_handled & GFAPI_XREADDIRP_STAT)
- gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_INVALID_ARG,
- "GFAPI_XREADDIRP_STAT is not set. Flags"
- "handled for xstat(%p) are (%x)",
- xstat, xstat->flags_handled);
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_FLAGS_HANDLE,
+ "GFAPI_XREADDIRP_STAT"
+ "xstat=%p",
+ xstat, "handles=%x", xstat->flags_handled, NULL);
return &xstat->st;
out:
return NULL;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0);
void
gf_lease_to_glfs_lease(struct gf_lease *gf_lease, struct glfs_lease *lease)
{
+ u_int lease_type = gf_lease->lease_type;
lease->cmd = gf_lease->cmd;
- lease->lease_type = gf_lease->lease_type;
+ lease->lease_type = lease_type;
memcpy(lease->lease_id, gf_lease->lease_id, LEASE_ID_SIZE);
}
void
glfs_lease_to_gf_lease(struct glfs_lease *lease, struct gf_lease *gf_lease)
{
+ u_int lease_type = lease->lease_type;
gf_lease->cmd = lease->cmd;
- gf_lease->lease_type = lease->lease_type;
+ gf_lease->lease_type = lease_type;
memcpy(gf_lease->lease_id, lease->lease_id, LEASE_ID_SIZE);
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lease, 4.0.0)
int
pub_glfs_lease(struct glfs_fd *glfd, struct glfs_lease *lease,
glfs_recall_cbk fn, void *data)
@@ -5883,8 +6422,8 @@ pub_glfs_lease(struct glfs_fd *glfd, struct glfs_lease *lease,
if (ret == 0) {
ret = fd_ctx_set(glfd->fd, subvol, (uint64_t)(long)glfd);
if (ret) {
- gf_msg(subvol->name, GF_LOG_ERROR, ENOMEM, API_MSG_FDCTX_SET_FAILED,
- "Setting fd ctx failed for fd(%p)", glfd->fd);
+ gf_smsg(subvol->name, GF_LOG_ERROR, ENOMEM,
+ API_MSG_FDCTX_SET_FAILED, "fd=%p", glfd->fd, NULL);
goto out;
}
glfd->cbk = fn;
@@ -5904,5 +6443,3 @@ out:
invalid_fs:
return ret;
}
-
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lease, 4.0.0);
diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
index 6a020ca7ab6..53c2ee896f9 100644
--- a/api/src/glfs-handleops.c
+++ b/api/src/glfs-handleops.c
@@ -10,7 +10,7 @@
#include "glfs-internal.h"
#include "glfs-mem-types.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
#include "glfs.h"
#include "glfs-handles.h"
#include "gfapi-messages.h"
@@ -18,7 +18,7 @@
int
glfs_listxattr_process(void *value, size_t size, dict_t *xattr);
-static void
+void
glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt,
int *glvalid)
{
@@ -60,6 +60,7 @@ glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt,
return;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lookupat, 3.7.4)
struct glfs_object *
pub_glfs_h_lookupat(struct glfs *fs, struct glfs_object *parent,
const char *path, struct stat *stat, int follow)
@@ -126,8 +127,7 @@ invalid_fs:
return object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lookupat, 3.7.4);
-
+GFAPI_SYMVER_PUBLIC(glfs_h_lookupat34, glfs_h_lookupat, 3.4.2)
struct glfs_object *
pub_glfs_h_lookupat34(struct glfs *fs, struct glfs_object *parent,
const char *path, struct stat *stat)
@@ -135,8 +135,7 @@ pub_glfs_h_lookupat34(struct glfs *fs, struct glfs_object *parent,
return pub_glfs_h_lookupat(fs, parent, path, stat, 0);
}
-GFAPI_SYMVER_PUBLIC(glfs_h_lookupat34, glfs_h_lookupat, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_statfs, 3.7.0)
int
pub_glfs_h_statfs(struct glfs *fs, struct glfs_object *object,
struct statvfs *statvfs)
@@ -194,8 +193,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_statfs, 3.7.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_stat, 3.4.2)
int
pub_glfs_h_stat(struct glfs *fs, struct glfs_object *object, struct stat *stat)
{
@@ -259,8 +257,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_stat, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getattrs, 3.4.2)
int
pub_glfs_h_getattrs(struct glfs *fs, struct glfs_object *object,
struct stat *stat)
@@ -317,8 +314,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getattrs, 3.4.2);
-
int
glfs_h_getxattrs_common(struct glfs *fs, struct glfs_object *object,
dict_t **xattr, const char *name,
@@ -380,6 +375,7 @@ out:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getxattrs, 3.5.1)
int
pub_glfs_h_getxattrs(struct glfs *fs, struct glfs_object *object,
const char *name, void *value, size_t size)
@@ -416,8 +412,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getxattrs, 3.5.1);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setattrs, 3.4.2)
int
pub_glfs_h_setattrs(struct glfs *fs, struct glfs_object *object,
struct stat *stat, int valid)
@@ -480,8 +475,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setattrs, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setxattrs, 3.5.0)
int
pub_glfs_h_setxattrs(struct glfs *fs, struct glfs_object *object,
const char *name, const void *value, size_t size,
@@ -568,8 +562,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setxattrs, 3.5.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_removexattrs, 3.5.1)
int
pub_glfs_h_removexattrs(struct glfs *fs, struct glfs_object *object,
const char *name)
@@ -626,8 +619,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_removexattrs, 3.5.1);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_open, 3.4.2)
struct glfs_fd *
pub_glfs_h_open(struct glfs *fs, struct glfs_object *object, int flags)
{
@@ -727,8 +719,7 @@ invalid_fs:
return glfd;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_open, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2)
struct glfs_object *
pub_glfs_h_creat(struct glfs *fs, struct glfs_object *parent, const char *path,
int flags, mode_t mode, struct stat *stat)
@@ -840,8 +831,141 @@ invalid_fs:
return object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6)
+struct glfs_object *
+pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent,
+ const char *path, int flags, mode_t mode,
+ struct stat *stat, struct glfs_fd **out_fd)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+ dict_t *fop_attr = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL) ||
+ (out_fd == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ /* fop/op */
+ ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt, xattr_req,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ glfd->fd->flags = flags;
+
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ /* Release the held reference */
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ *out_fd = glfd;
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mkdir, 3.4.2)
struct glfs_object *
pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path,
mode_t mode, struct stat *stat)
@@ -939,8 +1063,7 @@ invalid_fs:
return object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mkdir, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mknod, 3.4.2)
struct glfs_object *
pub_glfs_h_mknod(struct glfs *fs, struct glfs_object *parent, const char *path,
mode_t mode, dev_t dev, struct stat *stat)
@@ -1037,8 +1160,7 @@ invalid_fs:
return object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mknod, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_unlink, 3.4.2)
int
pub_glfs_h_unlink(struct glfs *fs, struct glfs_object *parent, const char *path)
{
@@ -1109,8 +1231,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_unlink, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_opendir, 3.4.2)
struct glfs_fd *
pub_glfs_h_opendir(struct glfs *fs, struct glfs_object *object)
{
@@ -1192,8 +1313,7 @@ invalid_fs:
return glfd;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_opendir, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_access, 3.6.0)
int
pub_glfs_h_access(struct glfs *fs, struct glfs_object *object, int mask)
{
@@ -1250,8 +1370,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_access, 3.6.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_extract_handle, 3.4.2)
ssize_t
pub_glfs_h_extract_handle(struct glfs_object *object, unsigned char *handle,
int len)
@@ -1282,8 +1401,7 @@ out:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_extract_handle, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_create_from_handle, 3.4.2)
struct glfs_object *
pub_glfs_h_create_from_handle(struct glfs *fs, unsigned char *handle, int len,
struct stat *stat)
@@ -1360,9 +1478,9 @@ pub_glfs_h_create_from_handle(struct glfs *fs, unsigned char *handle, int len,
ret = syncop_lookup(subvol, &loc, &iatt, 0, 0, 0);
DECODE_SYNCOP_ERR(ret);
if (ret) {
- gf_msg(subvol->name, GF_LOG_WARNING, errno,
- API_MSG_INODE_REFRESH_FAILED, "inode refresh of %s failed: %s",
- uuid_utoa(loc.gfid), strerror(errno));
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s", uuid_utoa(loc.gfid),
+ "error=%s", strerror(errno), NULL);
goto out;
}
@@ -1373,8 +1491,8 @@ pub_glfs_h_create_from_handle(struct glfs *fs, unsigned char *handle, int len,
}
inode_lookup(newinode);
} else {
- gf_msg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
- "inode linking of %s failed", uuid_utoa(loc.gfid));
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa(loc.gfid), NULL);
goto out;
}
@@ -1406,8 +1524,7 @@ invalid_fs:
return object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_create_from_handle, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_close, 3.4.2)
int
pub_glfs_h_close(struct glfs_object *object)
{
@@ -1420,8 +1537,7 @@ pub_glfs_h_close(struct glfs_object *object)
return 0;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_close, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_truncate, 3.4.2)
int
pub_glfs_h_truncate(struct glfs *fs, struct glfs_object *object, off_t offset)
{
@@ -1481,8 +1597,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_truncate, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_symlink, 3.4.2)
struct glfs_object *
pub_glfs_h_symlink(struct glfs *fs, struct glfs_object *parent,
const char *name, const char *data, struct stat *stat)
@@ -1581,8 +1696,7 @@ invalid_fs:
return object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_symlink, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_readlink, 3.4.2)
int
pub_glfs_h_readlink(struct glfs *fs, struct glfs_object *object, char *buf,
size_t bufsiz)
@@ -1647,8 +1761,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_readlink, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_link, 3.4.2)
int
pub_glfs_h_link(struct glfs *fs, struct glfs_object *linksrc,
struct glfs_object *parent, const char *name)
@@ -1745,8 +1858,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_link, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_rename, 3.4.2)
int
pub_glfs_h_rename(struct glfs *fs, struct glfs_object *olddir,
const char *oldname, struct glfs_object *newdir,
@@ -1856,8 +1968,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_rename, 3.4.2);
-
/*
* Given a handle/gfid, find if the corresponding inode is present in
* the inode table. If yes create and return the corresponding glfs_object.
@@ -1962,8 +2072,8 @@ glfs_h_poll_cache_invalidation(struct glfs *fs, struct glfs_upcall *up_arg,
* the handle and hence will no more be interested in
* the upcall for this particular gfid.
*/
- gf_msg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_CREATE_HANDLE_FAILED,
- "handle creation of %s failed", uuid_utoa(upcall_data->gfid));
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "gfid=%s", uuid_utoa(upcall_data->gfid), NULL);
errno = ESTALE;
goto out;
}
@@ -1986,9 +2096,9 @@ glfs_h_poll_cache_invalidation(struct glfs *fs, struct glfs_upcall *up_arg,
p_object = glfs_h_find_handle(fs, ca_data->p_stat.ia_gfid,
GFAPI_HANDLE_LENGTH);
if (!p_object) {
- gf_msg(THIS->name, GF_LOG_DEBUG, errno,
- API_MSG_CREATE_HANDLE_FAILED, "handle creation of %s failed",
- uuid_utoa(ca_data->p_stat.ia_gfid));
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_CREATE_HANDLE_FAILED, "gfid=%s",
+ uuid_utoa(ca_data->p_stat.ia_gfid), NULL);
errno = ESTALE;
goto out;
}
@@ -2002,9 +2112,9 @@ glfs_h_poll_cache_invalidation(struct glfs *fs, struct glfs_upcall *up_arg,
oldp_object = glfs_h_find_handle(fs, ca_data->oldp_stat.ia_gfid,
GFAPI_HANDLE_LENGTH);
if (!oldp_object) {
- gf_msg(THIS->name, GF_LOG_DEBUG, errno,
- API_MSG_CREATE_HANDLE_FAILED, "handle creation of %s failed",
- uuid_utoa(ca_data->oldp_stat.ia_gfid));
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_CREATE_HANDLE_FAILED, "gfid=%s",
+ uuid_utoa(ca_data->oldp_stat.ia_gfid), NULL);
errno = ESTALE;
/* By the time we receive upcall old parent_dir may
* have got removed. We still need to send upcall
@@ -2065,6 +2175,7 @@ glfs_release_upcall(void *ptr)
* calling glfs_fini(..). Hence making an assumption that 'fs' & ctx structures
* cannot be freed while in this routine.
*/
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_poll_upcall, 3.7.16)
int
pub_glfs_h_poll_upcall(struct glfs *fs, struct glfs_upcall **up_arg)
{
@@ -2182,8 +2293,6 @@ err:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_poll_upcall, 3.7.16);
-
static gf_boolean_t log_upcall370 = _gf_true; /* log once */
/* The old glfs_h_poll_upcall interface requires intimate knowledge of the
@@ -2197,6 +2306,7 @@ static gf_boolean_t log_upcall370 = _gf_true; /* log once */
*
* WARNING: this function will be removed in the future.
*/
+GFAPI_SYMVER_PUBLIC(glfs_h_poll_upcall370, glfs_h_poll_upcall, 3.7.0)
int
pub_glfs_h_poll_upcall370(struct glfs *fs, struct glfs_callback_arg *up_arg)
{
@@ -2216,7 +2326,7 @@ pub_glfs_h_poll_upcall370(struct glfs *fs, struct glfs_callback_arg *up_arg)
ret = pub_glfs_h_poll_upcall(fs, &upcall);
if (ret == 0) {
up_arg->fs = fs;
- if (errno == ENOENT || upcall->event == NULL) {
+ if ((errno == ENOENT) || !upcall || !upcall->event) {
up_arg->reason = GLFS_UPCALL_EVENT_NULL;
goto out;
}
@@ -2264,12 +2374,11 @@ out:
return ret;
}
-GFAPI_SYMVER_PUBLIC(glfs_h_poll_upcall370, glfs_h_poll_upcall, 3.7.0);
-
#ifdef HAVE_ACL_LIBACL_H
-#include "glusterfs-acl.h"
+#include <glusterfs/glusterfs-acl.h>
#include <acl/libacl.h>
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0)
int
pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
const acl_type_t type, const acl_t acl)
@@ -2318,6 +2427,7 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0)
acl_t
pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
const acl_type_t type)
@@ -2372,6 +2482,7 @@ invalid_fs:
return acl;
}
#else /* !HAVE_ACL_LIBACL_H */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0)
acl_t
pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
const acl_type_t type)
@@ -2380,6 +2491,7 @@ pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
return NULL;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0)
int
pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
const acl_type_t type, const acl_t acl)
@@ -2388,10 +2500,9 @@ pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
return -1;
}
#endif
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0);
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0);
/* The API to perform read using anonymous fd */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_read, 3.7.0)
ssize_t
pub_glfs_h_anonymous_read(struct glfs *fs, struct glfs_object *object,
const void *buf, size_t count, off_t offset)
@@ -2415,9 +2526,8 @@ pub_glfs_h_anonymous_read(struct glfs *fs, struct glfs_object *object,
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_read, 3.7.0);
-
/* The API to perform write using anonymous fd */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0)
ssize_t
pub_glfs_h_anonymous_write(struct glfs *fs, struct glfs_object *object,
const void *buf, size_t count, off_t offset)
@@ -2441,8 +2551,7 @@ pub_glfs_h_anonymous_write(struct glfs *fs, struct glfs_object *object,
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0)
struct glfs_object *
pub_glfs_object_copy(struct glfs_object *src)
{
@@ -2453,9 +2562,8 @@ pub_glfs_object_copy(struct glfs_object *src)
object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
if (object == NULL) {
errno = ENOMEM;
- gf_msg(THIS->name, GF_LOG_WARNING, errno, API_MSG_CREATE_HANDLE_FAILED,
- "glfs_dup_object for gfid-%s failed",
- uuid_utoa(src->inode->gfid));
+ gf_smsg(THIS->name, GF_LOG_WARNING, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "glfs_dup_object gfid=%s", uuid_utoa(src->inode->gfid), NULL);
return NULL;
}
@@ -2465,26 +2573,25 @@ pub_glfs_object_copy(struct glfs_object *src)
out:
return object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0)
struct glfs_object *
pub_glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat)
{
GF_VALIDATE_OR_GOTO("glfs_xreaddirplus_get_object", xstat, out);
if (!(xstat->flags_handled & GFAPI_XREADDIRP_HANDLE))
- gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_INVALID_ARG,
- "GFAPI_XREADDIRP_HANDLE is not set. Flags"
- "handled for xstat(%p) are (%x)",
- xstat, xstat->flags_handled);
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_HANDLE_NOT_SET,
+ "GFAPI_XREADDIRP_HANDLE xstat=%p", xstat, "handle=%x",
+ xstat->flags_handled, NULL);
return xstat->object;
out:
return NULL;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lease, 4.0.0)
int
pub_glfs_h_lease(struct glfs *fs, struct glfs_object *object,
struct glfs_lease *lease)
@@ -2546,5 +2653,3 @@ out:
invalid_fs:
return ret;
}
-
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lease, 4.0.0);
diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h
index 09b3b2ed493..4d039b9c76b 100644
--- a/api/src/glfs-handles.h
+++ b/api/src/glfs-handles.h
@@ -12,7 +12,6 @@
#define _GLFS_HANDLES_H
#include "glfs.h"
-#include <inttypes.h>
/* GLFS OBJECT BASED OPERATIONS
*
@@ -47,16 +46,6 @@
*
*/
-/* Values for valid flags to be used when using XXXsetattr, to set multiple
- attribute values passed via the related stat structure.
- */
-#define GFAPI_SET_ATTR_MODE 0x1
-#define GFAPI_SET_ATTR_UID 0x2
-#define GFAPI_SET_ATTR_GID 0x4
-#define GFAPI_SET_ATTR_SIZE 0x8
-#define GFAPI_SET_ATTR_ATIME 0x10
-#define GFAPI_SET_ATTR_MTIME 0x20
-
/* Handle length for object handles returned from glfs_h_extract_handle or
* glfs_h_create_from_handle */
#define GFAPI_HANDLE_LENGTH 16
@@ -261,6 +250,11 @@ int
glfs_h_access(glfs_t *fs, glfs_object_t *object, int mask) __THROW
GFAPI_PUBLIC(glfs_h_access, 3.6.0);
+struct glfs_object *
+glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat,
+ struct glfs_fd **out_fd) __THROW
+ GFAPI_PUBLIC(glfs_h_creat_open, 6.6);
/*
SYNOPSIS
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
index a8d5c59e209..7cc3b18a104 100644
--- a/api/src/glfs-internal.h
+++ b/api/src/glfs-internal.h
@@ -11,11 +11,12 @@
#ifndef _GLFS_INTERNAL_H
#define _GLFS_INTERNAL_H
-#include "xlator.h"
-#include "glusterfs.h"
-#include "upcall-utils.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/upcall-utils.h>
#include "glfs-handles.h"
-#include "refcount.h"
+#include <glusterfs/refcount.h>
+#include <glusterfs/syncop.h>
#define GLFS_SYMLINK_MAX_FOLLOW 2048
@@ -80,25 +81,40 @@
#ifndef GFAPI_PRIVATE
#define GFAPI_PRIVATE(sym, ver) /**/
#endif
+#if __GNUC__ >= 10
#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \
- asm(".symver pub_" STR(fn) ", " STR(fn) "@@GFAPI_" STR(ver))
+ __attribute__((__symver__(STR(fn) "@@GFAPI_" STR(ver))))
#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \
- asm(".symver priv_" STR(fn) ", " STR(fn) "@@GFAPI_PRIVATE_" STR(ver))
+ __attribute__((__symver__(STR(fn) "@@GFAPI_PRIVATE_" STR(ver))))
#define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \
- asm(".symver pub_" STR(fn1) ", " STR(fn2) "@GFAPI_" STR(ver))
+ __attribute__((__symver__(STR(fn2) "@GFAPI_" STR(ver))))
#define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \
- asm(".symver priv_" STR(fn1) ", " STR(fn2) "@GFAPI_PRIVATE_" STR(ver))
+ __attribute__((__symver__(STR(fn2) "@GFAPI_PRIVATE_" STR(ver))))
+
+#else
+#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \
+ asm(".symver pub_" STR(fn) ", " STR(fn) "@@GFAPI_" STR(ver));
+
+#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \
+ asm(".symver priv_" STR(fn) ", " STR(fn) "@@GFAPI_PRIVATE_" STR(ver));
+
+#define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \
+ asm(".symver pub_" STR(fn1) ", " STR(fn2) "@GFAPI_" STR(ver));
+
+#define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \
+ asm(".symver priv_" STR(fn1) ", " STR(fn2) "@GFAPI_PRIVATE_" STR(ver));
+#endif
#define STR(str) #str
#else
#ifndef GFAPI_PUBLIC
-#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver))
+#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver));
#endif
#ifndef GFAPI_PRIVATE
#define GFAPI_PRIVATE(sym, ver) \
- __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver))
+ __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver));
#endif
#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, dotver) /**/
#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, dotver) /**/
@@ -207,6 +223,7 @@ struct glfs {
glfs_upcall_cbk up_cbk; /* upcall cbk function to be registered */
void *up_data; /* Opaque data provided by application
* during upcall registration */
+ struct list_head waitq; /* waiting synctasks */
};
/* This enum is used to maintain the state of glfd. In case of async fops
@@ -442,6 +459,34 @@ glfs_process_upcall_event(struct glfs *fs, void *data)
THIS = glfd->fd->inode->table->xl->ctx->master; \
} while (0)
+#define __GLFS_LOCK_WAIT(fs) \
+ do { \
+ struct synctask *task = NULL; \
+ \
+ task = synctask_get(); \
+ \
+ if (task) { \
+ list_add_tail(&task->waitq, &fs->waitq); \
+ pthread_mutex_unlock(&fs->mutex); \
+ synctask_yield(task, NULL); \
+ pthread_mutex_lock(&fs->mutex); \
+ } else { \
+ /* non-synctask */ \
+ pthread_cond_wait(&fs->cond, &fs->mutex); \
+ } \
+ } while (0)
+
+#define __GLFS_SYNCTASK_WAKE(fs) \
+ do { \
+ struct synctask *waittask = NULL; \
+ \
+ while (!list_empty(&fs->waitq)) { \
+ waittask = list_entry(fs->waitq.next, struct synctask, waitq); \
+ list_del_init(&waittask->waitq); \
+ synctask_wake(waittask); \
+ } \
+ } while (0)
+
/*
By default all lock attempts from user context must
use glfs_lock() and glfs_unlock(). This allows
@@ -466,10 +511,10 @@ glfs_lock(struct glfs *fs, gf_boolean_t wait_for_migration)
pthread_mutex_lock(&fs->mutex);
while (!fs->init)
- pthread_cond_wait(&fs->cond, &fs->mutex);
+ __GLFS_LOCK_WAIT(fs);
while (wait_for_migration && fs->migration_in_progress)
- pthread_cond_wait(&fs->cond, &fs->mutex);
+ __GLFS_LOCK_WAIT(fs);
return 0;
}
@@ -522,6 +567,9 @@ int
glfs_loc_touchup(loc_t *loc) GFAPI_PRIVATE(glfs_loc_touchup, 3.4.0);
void
glfs_iatt_to_stat(struct glfs *fs, struct iatt *iatt, struct stat *stat);
+void
+glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt,
+ int *gvalid);
int
glfs_loc_link(loc_t *loc, struct iatt *iatt);
int
@@ -667,4 +715,42 @@ get_fop_attr_thrd_key(dict_t **fop_attr);
void
unset_fop_attr(dict_t **fop_attr);
+
+/*
+ SYNOPSIS
+ glfs_statx: Fetch extended file attributes for the given path.
+
+ DESCRIPTION
+ This function fetches extended file attributes for the given path.
+
+ PARAMETERS
+ @fs: The 'virtual mount' object referencing a volume, under which file exists.
+ @path: Path of the file within the virtual mount.
+ @mask: Requested extended file attributes mask, (See mask defines above)
+
+ RETURN VALUES
+ -1 : Failure. @errno will be set with the type of failure.
+ 0 : Filled in statxbuf with appropriate masks for valid items in the
+ structure.
+
+ ERRNO VALUES
+ EINVAL: fs is invalid
+ EINVAL: mask has unsupported bits set
+ Other errors as returned by stat(2)
+ */
+
+int
+glfs_statx(struct glfs *fs, const char *path, unsigned int mask,
+ struct glfs_stat *statxbuf) GFAPI_PRIVATE(glfs_statx, 6.0);
+
+void
+glfs_iatt_from_statx(struct iatt *, const struct glfs_stat *)
+ GFAPI_PRIVATE(glfs_iatt_from_statx, 6.0);
+
+/*
+ * This API is a per thread setting, similar to glfs_setfs{u/g}id, because of
+ * the call to syncopctx_setfspid.
+ */
+int
+glfs_setfspid(struct glfs *, pid_t) GFAPI_PRIVATE(glfs_setfspid, 6.1);
#endif /* !_GLFS_INTERNAL_H */
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
index 969095a8224..100dcc16cc0 100644
--- a/api/src/glfs-master.c
+++ b/api/src/glfs-master.c
@@ -8,15 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include <unistd.h>
-#include <string.h>
-#include <stdlib.h>
#include <stdio.h>
-#include <inttypes.h>
-#include <limits.h>
-#include "xlator.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "glfs-internal.h"
#include "glfs-mem-types.h"
@@ -81,9 +75,10 @@ notify(xlator_t *this, int event, void *data, ...)
switch (event) {
case GF_EVENT_GRAPH_NEW:
- gf_msg(this->name, GF_LOG_INFO, 0, API_MSG_NEW_GRAPH,
- "New graph %s (%d) coming up",
- uuid_utoa((unsigned char *)graph->graph_uuid), graph->id);
+ gf_smsg(this->name, GF_LOG_INFO, 0, API_MSG_NEW_GRAPH,
+ "graph-uuid=%s",
+ uuid_utoa((unsigned char *)graph->graph_uuid), "id=%d",
+ graph->id, NULL);
break;
case GF_EVENT_CHILD_UP:
pthread_mutex_lock(&fs->mutex);
@@ -126,9 +121,8 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, glfs_mt_end + 1);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, API_MSG_MEM_ACCT_INIT_FAILED,
- "Failed to initialise "
- "memory accounting");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, API_MSG_MEM_ACCT_INIT_FAILED,
+ NULL);
return ret;
}
@@ -169,6 +163,21 @@ struct xlator_dumpops dumpops;
struct xlator_fops fops;
-struct xlator_cbks cbks = {.forget = glfs_forget,
- .release = glfs_release,
- .releasedir = glfs_releasedir};
+struct xlator_cbks cbks = {
+ .forget = glfs_forget,
+ .release = glfs_release,
+ .releasedir = glfs_releasedir,
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .identifier = "glfs-api",
+ .category = GF_MAINTAINED,
+};
diff --git a/api/src/glfs-mem-types.h b/api/src/glfs-mem-types.h
index e1316d128af..bfa325a3ad9 100644
--- a/api/src/glfs-mem-types.h
+++ b/api/src/glfs-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _GLFS_MEM_TYPES_H
#define _GLFS_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
index 87b62173689..7c82b8cd162 100644
--- a/api/src/glfs-mgmt.c
+++ b/api/src/glfs-mgmt.c
@@ -15,27 +15,20 @@
#include <signal.h>
#include <pthread.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "glfs.h"
-#include "dict.h"
-#include "gf-event.h"
-#include "defaults.h"
+#include <glusterfs/dict.h>
#include "rpc-clnt.h"
#include "protocol-common.h"
-#include "glusterfs3.h"
-#include "portmap-xdr.h"
-#include "xdr-common.h"
#include "xdr-generic.h"
#include "rpc-common-xdr.h"
-#include "syncop.h"
-#include "xlator.h"
+#include <glusterfs/syncop.h>
#include "glfs-internal.h"
-#include "glfs-mem-types.h"
#include "gfapi-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
int
glfs_volfile_fetch(struct glfs *fs);
@@ -53,16 +46,15 @@ glfs_process_volfp(struct glfs *fs, FILE *fp)
ctx = fs->ctx;
graph = glusterfs_graph_construct(fp);
if (!graph) {
- gf_msg("glfs", GF_LOG_ERROR, errno, API_MSG_GRAPH_CONSTRUCT_FAILED,
- "failed to construct the graph");
+ gf_smsg("glfs", GF_LOG_ERROR, errno, API_MSG_GRAPH_CONSTRUCT_FAILED,
+ NULL);
goto out;
}
for (trav = graph->first; trav; trav = trav->next) {
if (strcmp(trav->type, "mount/api") == 0) {
- gf_msg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_API_XLATOR_ERROR,
- "api master xlator cannot be specified "
- "in volume file");
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_API_XLATOR_ERROR,
+ NULL);
goto out;
}
}
@@ -127,32 +119,28 @@ mgmt_cbk_statedump(struct rpc_clnt *rpc, void *mydata, void *data)
this = mydata;
if (!this) {
- gf_msg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_STATEDUMP_FAILED,
- "NULL mydata");
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "mydata", NULL);
errno = EINVAL;
goto out;
}
fs = this->private;
if (!fs) {
- gf_msg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_STATEDUMP_FAILED,
- "NULL glfs");
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "glfs", NULL);
errno = EINVAL;
goto out;
}
iov = (struct iovec *)data;
if (!iov) {
- gf_msg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_STATEDUMP_FAILED,
- "NULL iovec data");
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "iovec data", NULL);
errno = EINVAL;
goto out;
}
ret = xdr_to_generic(*iov, &target_pid, (xdrproc_t)xdr_gf_statedump);
if (ret < 0) {
- gf_msg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_STATEDUMP_FAILED,
- "Failed to decode xdr response for GF_CBK_STATEDUMP");
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_DECODE_XDR_FAILED, NULL);
goto out;
}
@@ -163,22 +151,21 @@ mgmt_cbk_statedump(struct rpc_clnt *rpc, void *mydata, void *data)
ret = glfs_sysrq(fs, GLFS_SYSRQ_STATEDUMP);
if (ret < 0) {
- gf_msg("glfs", GF_LOG_INFO, 0, API_MSG_STATEDUMP_FAILED,
- "statedump failed");
+ gf_smsg("glfs", GF_LOG_INFO, 0, API_MSG_STATEDUMP_FAILED, NULL);
}
}
out:
return ret;
}
-rpcclnt_cb_actor_t mgmt_cbk_actors[GF_CBK_MAXVALUE] = {
- [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec},
- [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
- mgmt_cbk_event},
- [GF_CBK_STATEDUMP] = {"STATEDUMP", GF_CBK_STATEDUMP, mgmt_cbk_statedump},
+static rpcclnt_cb_actor_t mgmt_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", mgmt_cbk_spec, GF_CBK_FETCHSPEC},
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", mgmt_cbk_event,
+ GF_CBK_EVENT_NOTIFY},
+ [GF_CBK_STATEDUMP] = {"STATEDUMP", mgmt_cbk_statedump, GF_CBK_STATEDUMP},
};
-struct rpcclnt_cb_program mgmt_cbk_prog = {
+static struct rpcclnt_cb_program mgmt_cbk_prog = {
.progname = "GlusterFS Callback",
.prognum = GLUSTER_CBK_PROGRAM,
.progver = GLUSTER_CBK_VERSION,
@@ -186,7 +173,7 @@ struct rpcclnt_cb_program mgmt_cbk_prog = {
.numactors = GF_CBK_MAXVALUE,
};
-char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
[GF_HNDSK_NULL] = "NULL",
[GF_HNDSK_SETVOLUME] = "SETVOLUME",
[GF_HNDSK_GETSPEC] = "GETSPEC",
@@ -195,7 +182,7 @@ char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
[GF_HNDSK_GET_VOLUME_INFO] = "GETVOLUMEINFO",
};
-rpc_clnt_prog_t clnt_handshake_prog = {
+static rpc_clnt_prog_t clnt_handshake_prog = {
.progname = "GlusterFS Handshake",
.prognum = GLUSTER_HNDSK_PROGRAM,
.progver = GLUSTER_HNDSK_VERSION,
@@ -237,8 +224,8 @@ mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx,
/* Create the xdr payload */
ret = xdr_serialize_generic(iov, req, xdrproc);
if (ret == -1) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, API_MSG_XDR_PAYLOAD_FAILED,
- "failed to create XDR payload");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_XDR_PAYLOAD_FAILED,
+ NULL);
goto out;
}
iov.iov_len = ret;
@@ -268,7 +255,6 @@ mgmt_get_volinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = 0;
char *volume_id_str = NULL;
dict_t *dict = NULL;
- char key[1024] = {0};
gf_get_volume_info_rsp rsp = {
0,
};
@@ -282,8 +268,8 @@ mgmt_get_volinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
args = frame->local;
if (!ctx) {
- gf_msg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_INVALID_ENTRY,
- "NULL context");
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_NULL,
+ "context", NULL);
errno = EINVAL;
ret = -1;
goto out;
@@ -292,8 +278,8 @@ mgmt_get_volinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
fs = ((xlator_t *)ctx->master)->private;
if (-1 == req->rpc_status) {
- gf_msg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_INVALID_ENTRY,
- "GET_VOLUME_INFO RPC call is not successful");
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL,
+ API_MSG_CALL_NOT_SUCCESSFUL, NULL);
errno = EINVAL;
ret = -1;
goto out;
@@ -302,9 +288,8 @@ mgmt_get_volinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_get_volume_info_rsp);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_ERROR, 0,
- API_MSG_XDR_RESPONSE_DECODE_FAILED,
- "Failed to decode xdr response for GET_VOLUME_INFO");
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0,
+ API_MSG_XDR_RESPONSE_DECODE_FAILED, NULL);
goto out;
}
@@ -320,9 +305,8 @@ mgmt_get_volinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
if (!rsp.dict.dict_len) {
- gf_msg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_INVALID_ENTRY,
- "Response received for "
- "GET_VOLUME_INFO RPC call is not valid");
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_CALL_NOT_VALID,
+ NULL);
ret = -1;
errno = EINVAL;
goto out;
@@ -343,8 +327,7 @@ mgmt_get_volinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- snprintf(key, sizeof(key), "volume_id");
- ret = dict_get_str(dict, key, &volume_id_str);
+ ret = dict_get_str_sizen(dict, "volume_id", &volume_id_str);
if (ret) {
errno = EINVAL;
goto out;
@@ -360,11 +343,9 @@ out:
}
if (ret) {
- gf_msg(frame->this->name, GF_LOG_ERROR, errno,
- API_MSG_GET_VOLINFO_CBK_FAILED,
- "In GET_VOLUME_INFO "
- "cbk, received error: %s",
- strerror(errno));
+ gf_smsg(frame->this->name, GF_LOG_ERROR, errno,
+ API_MSG_GET_VOLINFO_CBK_FAILED, "error=%s", strerror(errno),
+ NULL);
}
if (dict)
@@ -383,6 +364,7 @@ out:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volumeid, 3.5.0)
int
pub_glfs_get_volumeid(struct glfs *fs, char *volid, size_t size)
{
@@ -406,9 +388,8 @@ pub_glfs_get_volumeid(struct glfs *fs, char *volid, size_t size)
glfs_get_volume_info(fs);
if (gf_uuid_is_null(fs->vol_uuid)) {
- gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_FETCH_VOLUUID_FAILED,
- "Unable to fetch "
- "volume UUID");
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_FETCH_VOLUUID_FAILED,
+ NULL);
goto out;
}
@@ -420,8 +401,7 @@ done:
}
if (size < uuid_size) {
- gf_msg(THIS->name, GF_LOG_ERROR, ERANGE, API_MSG_INSUFF_SIZE,
- "Insufficient size passed");
+ gf_smsg(THIS->name, GF_LOG_ERROR, ERANGE, API_MSG_INSUFF_SIZE, NULL);
errno = ERANGE;
goto out;
}
@@ -439,8 +419,6 @@ invalid_fs:
return -1;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volumeid, 3.5.0);
-
int
glfs_get_volume_info(struct glfs *fs)
{
@@ -454,8 +432,7 @@ glfs_get_volume_info(struct glfs *fs)
ctx = fs->ctx;
frame = create_frame(THIS, ctx->pool);
if (!frame) {
- gf_msg("glfs", GF_LOG_ERROR, ENOMEM, API_MSG_FRAME_CREAT_FAILED,
- "failed to create the frame");
+ gf_smsg("glfs", GF_LOG_ERROR, ENOMEM, API_MSG_FRAME_CREAT_FAILED, NULL);
ret = -1;
goto out;
}
@@ -511,8 +488,8 @@ glfs_get_volume_info_rpc(call_frame_t *frame, xlator_t *this, struct glfs *fs)
flags = (int32_t)GF_GET_VOLUME_UUID; // ctx->flags;
ret = dict_set_int32(dict, "flags", flags);
if (ret) {
- gf_msg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
- "failed to set flags");
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL,
+ API_MSG_DICT_SET_FAILED, "flags", NULL);
goto out;
}
@@ -582,8 +559,8 @@ glfs_mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
ctx = frame->this->ctx;
if (!ctx) {
- gf_msg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_INVALID_ENTRY,
- "NULL context");
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_NULL,
+ "context", NULL);
errno = EINVAL;
ret = -1;
goto out;
@@ -599,16 +576,15 @@ glfs_mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_ERROR, 0, API_MSG_XDR_DECODE_FAILED,
- "XDR decoding error");
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0, API_MSG_XDR_DECODE_FAILED,
+ NULL);
ret = -1;
goto out;
}
if (-1 == rsp.op_ret) {
- gf_msg(frame->this->name, GF_LOG_ERROR, rsp.op_errno,
- API_MSG_GET_VOLFILE_FAILED,
- "failed to get the 'volume file' from server");
+ gf_smsg(frame->this->name, GF_LOG_ERROR, rsp.op_errno,
+ API_MSG_GET_VOLFILE_FAILED, "from server", NULL);
ret = -1;
errno = rsp.op_errno;
goto out;
@@ -653,12 +629,14 @@ volfile:
ret = 0;
size = rsp.op_ret;
+ pthread_mutex_lock(&fs->mutex);
if ((size == fs->oldvollen) &&
(memcmp(fs->oldvolfile, rsp.spec, size) == 0)) {
- gf_msg(frame->this->name, GF_LOG_INFO, 0, API_MSG_VOLFILE_INFO,
- "No change in volfile, continuing");
+ pthread_mutex_unlock(&fs->mutex);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, API_MSG_VOLFILE_INFO, NULL);
goto out;
}
+ pthread_mutex_unlock(&fs->mutex);
/* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
tmp_fd = mkstemp(template);
@@ -672,8 +650,8 @@ volfile:
*/
ret = sys_unlink(template);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_INFO, 0, API_MSG_VOLFILE_INFO,
- "Unable to delete file: %s", template);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, API_MSG_UNABLE_TO_DEL,
+ "template=%s", template, NULL);
ret = 0;
}
@@ -694,12 +672,15 @@ volfile:
* volfile if topology hasn't changed.
* glusterfs_volfile_reconfigure returns 3 possible return states
* return 0 =======> reconfiguration of options has succeeded
- * return 1 =======> the graph has to be reconstructed and all the
- * xlators should be inited return -1(or -ve) =======> Some Internal Error
- * occurred during the operation
+ * return 1 =======> the graph has to be reconstructed and all
+ * the xlators should be inited return -1(or -ve) =======> Some Internal
+ * Error occurred during the operation
*/
+ pthread_mutex_lock(&fs->mutex);
ret = gf_volfile_reconfigure(fs->oldvollen, tmpfp, fs->ctx, fs->oldvolfile);
+ pthread_mutex_unlock(&fs->mutex);
+
if (ret == 0) {
gf_msg_debug("glusterfsd-mgmt", 0,
"No need to re-load "
@@ -732,9 +713,7 @@ out:
// Stop if server is running at an unsupported op-version
if (ENOTSUP == ret) {
- gf_msg("mgmt", GF_LOG_ERROR, ENOTSUP, API_MSG_WRONG_OPVERSION,
- "Server is operating at an op-version which is not "
- "supported");
+ gf_smsg("mgmt", GF_LOG_ERROR, ENOTSUP, API_MSG_WRONG_OPVERSION, NULL);
errno = ENOTSUP;
glfs_init_done(fs, -1);
}
@@ -743,9 +722,8 @@ out:
/* Do it only for the first time */
/* Failed to get the volume file, something wrong,
restart the process */
- gf_msg("glfs-mgmt", GF_LOG_ERROR, EINVAL, API_MSG_INVALID_ENTRY,
- "failed to fetch volume file (key:%s)",
- ctx->cmd_args.volfile_id);
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, EINVAL, API_MSG_GET_VOLFILE_FAILED,
+ "key=%s", ctx->cmd_args.volfile_id, NULL);
if (!need_retry) {
if (!errno)
errno = EINVAL;
@@ -768,7 +746,7 @@ glfs_volfile_fetch(struct glfs *fs)
gf_getspec_req req = {
0,
};
- int ret = 0;
+ int ret = -1;
call_frame_t *frame = NULL;
glusterfs_ctx_t *ctx = NULL;
dict_t *dict = NULL;
@@ -776,14 +754,11 @@ glfs_volfile_fetch(struct glfs *fs)
ctx = fs->ctx;
cmd_args = &ctx->cmd_args;
- frame = create_frame(THIS, ctx->pool);
-
req.key = cmd_args->volfile_id;
req.flags = 0;
dict = dict_new();
if (!dict) {
- ret = -1;
goto out;
}
@@ -791,15 +766,15 @@ glfs_volfile_fetch(struct glfs *fs)
// decision
ret = dict_set_int32(dict, "min-op-version", GD_OP_VERSION_MIN);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
- "Failed to set min-op-version in request dict");
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
+ "min-op-version", NULL);
goto out;
}
ret = dict_set_int32(dict, "max-op-version", GD_OP_VERSION_MAX);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
- "Failed to set max-op-version in request dict");
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
+ "max-op-version", NULL);
goto out;
}
@@ -811,8 +786,14 @@ glfs_volfile_fetch(struct glfs *fs)
ret = dict_allocate_and_serialize(dict, &req.xdata.xdata_val,
&req.xdata.xdata_len);
if (ret < 0) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, API_MSG_DICT_SERIALIZE_FAILED,
- "Failed to serialize dictionary");
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, API_MSG_DICT_SERIALIZE_FAILED,
+ NULL);
+ goto out;
+ }
+
+ frame = create_frame(THIS, ctx->pool);
+ if (!frame) {
+ ret = -1;
goto out;
}
@@ -853,15 +834,13 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
case RPC_CLNT_DISCONNECT:
if (!ctx->active) {
if (rpc_trans->connect_failed)
- gf_msg("glfs-mgmt", GF_LOG_ERROR, 0,
- API_MSG_REMOTE_HOST_CONN_FAILED,
- "failed to connect to remote-host: %s",
- ctx->cmd_args.volfile_server);
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, 0,
+ API_MSG_REMOTE_HOST_CONN_FAILED, "server=%s",
+ ctx->cmd_args.volfile_server, NULL);
else
- gf_msg("glfs-mgmt", GF_LOG_INFO, 0,
- API_MSG_REMOTE_HOST_CONN_FAILED,
- "disconnected from remote-host: %s",
- ctx->cmd_args.volfile_server);
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, 0,
+ API_MSG_REMOTE_HOST_CONN_FAILED, "server=%s",
+ ctx->cmd_args.volfile_server, NULL);
if (!rpc->disabled) {
/*
@@ -876,9 +855,8 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
server = ctx->cmd_args.curr_server;
if (server->list.next == &ctx->cmd_args.volfile_servers) {
errno = ENOTCONN;
- gf_msg("glfs-mgmt", GF_LOG_INFO, ENOTCONN,
- API_MSG_VOLFILE_SERVER_EXHAUST,
- "Exhausted all volfile servers");
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, ENOTCONN,
+ API_MSG_VOLFILE_SERVER_EXHAUST, NULL);
glfs_init_done(fs, -1);
break;
}
@@ -891,10 +869,9 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
ret = dict_set_str(rpc_trans->options, "transport-type",
server->transport);
if (ret != 0) {
- gf_msg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
- API_MSG_DICT_SET_FAILED,
- "failed to set transport-type: %s",
- server->transport);
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "transport-type=%s",
+ server->transport, NULL);
errno = ENOTCONN;
glfs_init_done(fs, -1);
break;
@@ -905,10 +882,10 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
"transport.socket.connect-path",
server->volfile_server);
if (ret != 0) {
- gf_msg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
- API_MSG_DICT_SET_FAILED,
- "failed to set socket.connect-path: %s",
- server->volfile_server);
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED,
+ "socket.connect-path=%s",
+ server->volfile_server, NULL);
errno = ENOTCONN;
glfs_init_done(fs, -1);
break;
@@ -923,9 +900,9 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
ret = dict_set_int32(rpc_trans->options, "remote-port",
server->port);
if (ret != 0) {
- gf_msg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
- API_MSG_DICT_SET_FAILED,
- "failed to set remote-port: %d", server->port);
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "remote-port=%d",
+ server->port, NULL);
errno = ENOTCONN;
glfs_init_done(fs, -1);
break;
@@ -934,10 +911,9 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
ret = dict_set_str(rpc_trans->options, "remote-host",
server->volfile_server);
if (ret != 0) {
- gf_msg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
- API_MSG_DICT_SET_FAILED,
- "failed to set remote-host: %s",
- server->volfile_server);
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "remote-host=%s",
+ server->volfile_server, NULL);
errno = ENOTCONN;
glfs_init_done(fs, -1);
break;
@@ -950,10 +926,9 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
"transport.socket.connect-path");
}
- gf_msg("glfs-mgmt", GF_LOG_INFO, 0, API_MSG_VOLFILE_CONNECTING,
- "connecting to next volfile server %s"
- " at port %d with transport: %s",
- server->volfile_server, server->port, server->transport);
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, 0, API_MSG_VOLFILE_CONNECTING,
+ "server=%s", server->volfile_server, "port=%d",
+ server->port, "transport=%s", server->transport, NULL);
}
break;
case RPC_CLNT_CONNECT:
@@ -961,9 +936,9 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
if (ret && (ctx->active == NULL)) {
/* Do it only for the first time */
/* Exit the process.. there are some wrong options */
- gf_msg("glfs-mgmt", GF_LOG_ERROR, EINVAL, API_MSG_INVALID_ENTRY,
- "failed to fetch volume file (key:%s)",
- ctx->cmd_args.volfile_id);
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, EINVAL,
+ API_MSG_GET_VOLFILE_FAILED, "key=%s",
+ ctx->cmd_args.volfile_id, NULL);
errno = EINVAL;
glfs_init_done(fs, -1);
}
@@ -1009,6 +984,10 @@ glfs_mgmt_init(struct glfs *fs)
if (ctx->mgmt)
return 0;
+ options = dict_new();
+ if (!options)
+ goto out;
+
if (cmd_args->volfile_server_port)
port = cmd_args->volfile_server_port;
@@ -1023,38 +1002,36 @@ glfs_mgmt_init(struct glfs *fs)
if (cmd_args->volfile_server_transport &&
!strcmp(cmd_args->volfile_server_transport, "unix")) {
- ret = rpc_transport_unix_options_build(&options, host, 0);
+ ret = rpc_transport_unix_options_build(options, host, 0);
} else {
- ret = rpc_transport_inet_options_build(&options, host, port);
+ xlator_cmdline_option_t *opt = find_xlator_option_in_cmd_args_t(
+ "address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt ? opt->value : NULL));
}
if (ret)
goto out;
- if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
- ctx->secure_mgmt = 1;
- ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
- }
-
rpc = rpc_clnt_new(options, THIS, THIS->name, 8);
if (!rpc) {
ret = -1;
- gf_msg(THIS->name, GF_LOG_WARNING, 0, API_MSG_CREATE_RPC_CLIENT_FAILED,
- "failed to create rpc clnt");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_CREATE_RPC_CLIENT_FAILED,
+ NULL);
goto out;
}
ret = rpc_clnt_register_notify(rpc, mgmt_rpc_notify, THIS);
if (ret) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, API_MSG_REG_NOTIFY_FUNC_FAILED,
- "failed to register notify function");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_REG_NOTIFY_FUNC_FAILED,
+ NULL);
goto out;
}
ret = rpcclnt_cbk_program_register(rpc, &mgmt_cbk_prog, THIS);
if (ret) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, API_MSG_REG_CBK_FUNC_FAILED,
- "failed to register callback function");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_REG_CBK_FUNC_FAILED,
+ NULL);
goto out;
}
@@ -1066,5 +1043,7 @@ glfs_mgmt_init(struct glfs *fs)
ret = rpc_clnt_start(rpc);
out:
+ if (options)
+ dict_unref(options);
return ret;
}
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
index 155d3dc538f..8a393ecb464 100644
--- a/api/src/glfs-resolve.c
+++ b/api/src/glfs-resolve.c
@@ -15,16 +15,16 @@
#include <inttypes.h>
#include <limits.h>
-#include "glusterfs.h"
-#include "logging.h"
-#include "stack.h"
-#include "gf-event.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/gf-event.h>
#include "glfs-mem-types.h"
-#include "common-utils.h"
-#include "syncop.h"
-#include "call-stub.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
#include "gfapi-messages.h"
-#include "inode.h"
+#include <glusterfs/inode.h>
#include "glfs-internal.h"
#define graphid_str(subvol) \
@@ -65,6 +65,9 @@ __glfs_first_lookup(struct glfs *fs, xlator_t *subvol)
fs->migration_in_progress = 0;
pthread_cond_broadcast(&fs->cond);
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
return ret;
}
@@ -116,9 +119,9 @@ glfs_refresh_inode_safe(xlator_t *subvol, inode_t *oldinode,
DECODE_SYNCOP_ERR(ret);
if (ret) {
- gf_msg(subvol->name, GF_LOG_WARNING, errno,
- API_MSG_INODE_REFRESH_FAILED, "inode refresh of %s failed: %s",
- uuid_utoa(oldinode->gfid), strerror(errno));
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s",
+ uuid_utoa(oldinode->gfid), "err=%s", strerror(errno), NULL);
loc_wipe(&loc);
return NULL;
}
@@ -129,9 +132,8 @@ glfs_refresh_inode_safe(xlator_t *subvol, inode_t *oldinode,
inode_ctx_set(newinode, THIS, &ctx_value);
inode_lookup(newinode);
} else {
- gf_msg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
- "inode linking of %s failed",
- uuid_utoa((unsigned char *)&iatt.ia_gfid));
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa((unsigned char *)&iatt.ia_gfid), NULL);
}
loc_wipe(&loc);
@@ -154,9 +156,13 @@ __glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode,
fs->migration_in_progress = 0;
pthread_cond_broadcast(&fs->cond);
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
return newinode;
}
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_loc_touchup, 3.4.0)
int
priv_glfs_loc_touchup(loc_t *loc)
{
@@ -171,8 +177,6 @@ priv_glfs_loc_touchup(loc_t *loc)
return ret;
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_loc_touchup, 3.4.0);
-
int
glfs_resolve_symlink(struct glfs *fs, xlator_t *subvol, inode_t *inode,
char **lpath)
@@ -335,6 +339,7 @@ glfs_resolve_component(struct glfs *fs, xlator_t *subvol, inode_t *parent,
if (temp_parent) {
inode_unref(loc.parent);
loc.parent = temp_parent;
+ gf_uuid_copy(loc.pargfid, temp_parent->gfid);
inode_find_directory_name(loc.inode, &loc.name);
}
@@ -345,10 +350,12 @@ glfs_resolve_component(struct glfs *fs, xlator_t *subvol, inode_t *parent,
if (temp_parent) {
inode_unref(loc.parent);
loc.parent = temp_parent;
+ gf_uuid_copy(loc.pargfid, temp_parent->gfid);
inode_find_directory_name(loc.inode, &loc.name);
} else if (__is_root_gfid(loc.inode->gfid)) {
inode_unref(loc.parent);
loc.parent = inode_ref(loc.inode);
+ gf_uuid_copy(loc.pargfid, loc.inode->gfid);
loc.name = ".";
} else {
inode_unref(loc.inode);
@@ -439,9 +446,8 @@ glfs_resolve_component(struct glfs *fs, xlator_t *subvol, inode_t *parent,
inode = inode_link(loc.inode, loc.parent, component, &ciatt);
if (!inode) {
- gf_msg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
- "inode linking of %s failed",
- uuid_utoa((unsigned char *)&ciatt.ia_gfid));
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa((unsigned char *)&ciatt.ia_gfid), NULL);
goto out;
} else if (inode == loc.inode)
inode_ctx_set(inode, THIS, &ctx_value);
@@ -460,6 +466,7 @@ out:
return inode;
}
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve_at, 3.4.0)
int
priv_glfs_resolve_at(struct glfs *fs, xlator_t *subvol, inode_t *at,
const char *origpath, loc_t *loc, struct iatt *iatt,
@@ -610,8 +617,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve_at, 3.4.0);
-
int
glfs_resolve_path(struct glfs *fs, xlator_t *subvol, const char *origpath,
loc_t *loc, struct iatt *iatt, int follow, int reval)
@@ -625,8 +630,8 @@ glfs_resolve_path(struct glfs *fs, xlator_t *subvol, const char *origpath,
cwd = glfs_cwd_get(fs);
if (NULL == cwd) {
- gf_msg(subvol->name, GF_LOG_WARNING, EIO, API_MSG_GET_CWD_FAILED,
- "Failed to get cwd");
+ gf_smsg(subvol->name, GF_LOG_WARNING, EIO, API_MSG_GET_CWD_FAILED,
+ NULL);
errno = EIO;
goto out;
}
@@ -640,6 +645,7 @@ out:
return ret;
}
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve, 3.7.0)
int
priv_glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
loc_t *loc, struct iatt *iatt, int reval)
@@ -650,7 +656,6 @@ priv_glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
return ret;
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve, 3.7.0);
int
glfs_lresolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
@@ -680,28 +685,27 @@ glfs_migrate_fd_locks_safe(struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd,
NULL, NULL);
DECODE_SYNCOP_ERR(ret);
if (ret < 0) {
- gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FGETXATTR_FAILED,
- "fgetxattr (%s) failed (%s) on graph %s (%d)",
- uuid_utoa_r(oldfd->inode->gfid, uuid1), strerror(errno),
- graphid_str(oldsubvol), oldsubvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FGETXATTR_FAILED,
+ "gfid=%s", uuid_utoa_r(oldfd->inode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(oldsubvol), "id=%d",
+ oldsubvol->graph->id, NULL);
goto out;
}
if (!dict_get(lockinfo, GF_XATTR_LOCKINFO_KEY)) {
- gf_msg(fs->volname, GF_LOG_WARNING, 0, API_MSG_LOCKINFO_KEY_MISSING,
- "missing lockinfo key (%s) on graph %s (%d)",
- uuid_utoa_r(oldfd->inode->gfid, uuid1), graphid_str(oldsubvol),
- oldsubvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_WARNING, 0, API_MSG_LOCKINFO_KEY_MISSING,
+ "gfid=%s", uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id, NULL);
goto out;
}
ret = syncop_fsetxattr(newsubvol, newfd, lockinfo, 0, NULL, NULL);
DECODE_SYNCOP_ERR(ret);
if (ret < 0) {
- gf_msg(fs->volname, GF_LOG_WARNING, 0, API_MSG_FSETXATTR_FAILED,
- "fsetxattr (%s) failed (%s) on graph %s (%d)",
- uuid_utoa_r(newfd->inode->gfid, uuid1), strerror(errno),
- graphid_str(newsubvol), newsubvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_WARNING, 0, API_MSG_FSETXATTR_FAILED,
+ "gfid=%s", uuid_utoa_r(newfd->inode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
goto out;
}
out:
@@ -722,6 +726,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
0,
};
char uuid1[64];
+ dict_t *xdata = NULL;
oldinode = oldfd->inode;
oldsubvol = oldinode->table->xl;
@@ -730,32 +735,43 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
return fd_ref(oldfd);
if (!oldsubvol->switched) {
- ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, NULL, NULL);
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED,
+ "err=%s", "last-fsync set failed", "gfid=%s",
+ uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id,
+ NULL);
+ }
+
+ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL);
DECODE_SYNCOP_ERR(ret);
if (ret) {
- gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED,
- "fsync() failed "
- "(%s) on %s graph %s (%d)",
- strerror(errno), uuid_utoa_r(oldfd->inode->gfid, uuid1),
- graphid_str(oldsubvol), oldsubvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED,
+ "err=%s", strerror(errno), "gfid=%s",
+ uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id,
+ NULL);
}
}
newinode = glfs_refresh_inode_safe(newsubvol, oldinode, _gf_false);
if (!newinode) {
- gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_INODE_REFRESH_FAILED,
- "inode (%s) refresh failed (%s) on graph %s (%d)",
- uuid_utoa_r(oldinode->gfid, uuid1), strerror(errno),
- graphid_str(newsubvol), newsubvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s",
+ uuid_utoa_r(oldinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
goto out;
}
newfd = fd_create(newinode, getpid());
if (!newfd) {
- gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FDCREATE_FAILED,
- "fd_create (%s) failed (%s) on graph %s (%d)",
- uuid_utoa_r(newinode->gfid, uuid1), strerror(errno),
- graphid_str(newsubvol), newsubvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno,
+ API_MSG_FDCREATE_FAILED_ON_GRAPH, "gfid=%s",
+ uuid_utoa_r(newinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
goto out;
}
@@ -763,8 +779,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
ret = inode_path(oldfd->inode, NULL, (char **)&loc.path);
if (ret < 0) {
- gf_msg(fs->volname, GF_LOG_INFO, 0, API_MSG_INODE_PATH_FAILED,
- "inode_path failed");
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_INODE_PATH_FAILED, NULL);
goto out;
}
@@ -780,21 +795,21 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
loc_wipe(&loc);
if (ret) {
- gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_SYNCOP_OPEN_FAILED,
- "syncop_open%s (%s) failed (%s) on graph %s (%d)",
- IA_ISDIR(oldinode->ia_type) ? "dir" : "",
- uuid_utoa_r(newinode->gfid, uuid1), strerror(errno),
- graphid_str(newsubvol), newsubvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_SYNCOP_OPEN_FAILED,
+ "type=%s", IA_ISDIR(oldinode->ia_type) ? "dir" : "", "gfid=%s",
+ uuid_utoa_r(newinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
goto out;
}
ret = glfs_migrate_fd_locks_safe(fs, oldsubvol, oldfd, newsubvol, newfd);
if (ret) {
- gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_LOCK_MIGRATE_FAILED,
- "lock migration (%s) failed (%s) on graph %s (%d)",
- uuid_utoa_r(newinode->gfid, uuid1), strerror(errno),
- graphid_str(newsubvol), newsubvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_LOCK_MIGRATE_FAILED,
+ "gfid=%s", uuid_utoa_r(newinode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
goto out;
}
@@ -809,6 +824,9 @@ out:
newfd = NULL;
}
+ if (xdata)
+ dict_unref(xdata);
+
return newfd;
}
@@ -829,6 +847,9 @@ __glfs_migrate_fd(struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd)
fs->migration_in_progress = 0;
pthread_cond_broadcast(&fs->cond);
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
return newfd;
}
@@ -875,9 +896,9 @@ __glfs_migrate_openfds(struct glfs *fs, xlator_t *subvol)
list_for_each_entry(glfd, &fs->openfds, openfds)
{
if (gf_uuid_is_null(glfd->fd->inode->gfid)) {
- gf_msg(fs->volname, GF_LOG_INFO, 0, API_MSG_OPENFD_SKIPPED,
- "skipping openfd %p/%p in graph %s (%d)", glfd, glfd->fd,
- graphid_str(subvol), subvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_OPENFD_SKIPPED,
+ "glfd=%p", glfd, "glfd->fd=%p", glfd->fd, "subvol=%s",
+ graphid_str(subvol), "id=%d", subvol->graph->id, NULL);
/* create in progress, defer */
continue;
}
@@ -912,10 +933,10 @@ __glfs_active_subvol(struct glfs *fs)
ret = __glfs_first_lookup(fs, new_subvol);
if (ret) {
- gf_msg(fs->volname, GF_LOG_INFO, errno,
- API_MSG_FIRST_LOOKUP_GRAPH_FAILED,
- "first lookup on graph %s (%d) failed (%s)",
- graphid_str(new_subvol), new_subvol->graph->id, strerror(errno));
+ gf_smsg(fs->volname, GF_LOG_INFO, errno,
+ API_MSG_FIRST_LOOKUP_GRAPH_FAILED, "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id,
+ "err=%s", strerror(errno), NULL);
return NULL;
}
@@ -924,11 +945,11 @@ __glfs_active_subvol(struct glfs *fs)
if (!new_cwd) {
char buf1[64];
- gf_msg(fs->volname, GF_LOG_INFO, errno,
- API_MSG_CWD_GRAPH_REF_FAILED,
- "cwd refresh of %s graph %s (%d) failed (%s)",
- uuid_utoa_r(fs->cwd->gfid, buf1), graphid_str(new_subvol),
- new_subvol->graph->id, strerror(errno));
+ gf_smsg(fs->volname, GF_LOG_INFO, errno,
+ API_MSG_CWD_GRAPH_REF_FAILED, "buf=%s",
+ uuid_utoa_r(fs->cwd->gfid, buf1), "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id,
+ "err=%s", strerror(errno), NULL);
return NULL;
}
}
@@ -949,13 +970,13 @@ __glfs_active_subvol(struct glfs *fs)
inode_unref(new_cwd);
}
- gf_msg(fs->volname, GF_LOG_INFO, 0, API_MSG_SWITCHED_GRAPH,
- "switched to graph %s (%d)", graphid_str(new_subvol),
- new_subvol->graph->id);
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_SWITCHED_GRAPH, "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id, NULL);
return new_subvol;
}
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_subvol_done, 3.4.0)
void
priv_glfs_subvol_done(struct glfs *fs, xlator_t *subvol)
{
@@ -983,8 +1004,7 @@ priv_glfs_subvol_done(struct glfs *fs, xlator_t *subvol)
}
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_subvol_done, 3.4.0);
-
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_active_subvol, 3.4.0)
xlator_t *
priv_glfs_active_subvol(struct glfs *fs)
{
@@ -1012,8 +1032,6 @@ priv_glfs_active_subvol(struct glfs *fs)
return subvol;
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_active_subvol, 3.4.0);
-
int
__glfs_cwd_set(struct glfs *fs, inode_t *inode)
{
diff --git a/api/src/glfs.c b/api/src/glfs.c
index 4c951d7b6f1..b4bf1423f6d 100644
--- a/api/src/glfs.c
+++ b/api/src/glfs.c
@@ -34,17 +34,18 @@
#include <sys/prctl.h>
#endif
-#include "glusterfs.h"
-#include "logging.h"
-#include "stack.h"
-#include "gf-event.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/gf-event.h>
#include "glfs-mem-types.h"
-#include "common-utils.h"
-#include "syncop.h"
-#include "call-stub.h"
-#include "hashfn.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/hashfn.h>
#include "rpc-clnt.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/syscall.h>
#include "gfapi-messages.h"
#include "glfs.h"
@@ -68,8 +69,8 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
ret = xlator_mem_acct_init(THIS, glfs_mt_end + 1);
if (ret != 0) {
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_MEM_ACCT_INIT_FAILED,
- "Memory accounting init failed");
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_MEM_ACCT_INIT_FAILED,
+ NULL);
return ret;
}
@@ -91,8 +92,8 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
goto err;
}
- ctx->event_pool = event_pool_new(DEFAULT_EVENT_POOL_SIZE,
- STARTING_EVENT_THREADS);
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
if (!ctx->event_pool) {
goto err;
}
@@ -188,23 +189,21 @@ create_master(struct glfs *fs)
goto err;
if (xlator_set_type(master, "mount/api") == -1) {
- gf_msg("glfs", GF_LOG_ERROR, 0, API_MSG_MASTER_XLATOR_INIT_FAILED,
- "master xlator "
- "for %s initialization failed",
- fs->volname);
+ gf_smsg("glfs", GF_LOG_ERROR, 0, API_MSG_MASTER_XLATOR_INIT_FAILED,
+ "name=%s", fs->volname, NULL);
goto err;
}
master->ctx = fs->ctx;
master->private = fs;
- master->options = get_new_dict();
+ master->options = dict_new();
if (!master->options)
goto err;
ret = xlator_init(master);
if (ret) {
- gf_msg("glfs", GF_LOG_ERROR, 0, API_MSG_GFAPI_XLATOR_INIT_FAILED,
- "failed to initialize gfapi translator");
+ gf_smsg("glfs", GF_LOG_ERROR, 0, API_MSG_GFAPI_XLATOR_INIT_FAILED,
+ NULL);
goto err;
}
@@ -230,9 +229,8 @@ get_volfp(struct glfs *fs)
cmd_args = &fs->ctx->cmd_args;
if ((specfp = fopen(cmd_args->volfile, "r")) == NULL) {
- gf_msg("glfs", GF_LOG_ERROR, errno, API_MSG_VOLFILE_OPEN_FAILED,
- "volume file %s open failed: %s", cmd_args->volfile,
- strerror(errno));
+ gf_smsg("glfs", GF_LOG_ERROR, errno, API_MSG_VOLFILE_OPEN_FAILED,
+ "file=%s", cmd_args->volfile, "err=%s", strerror(errno), NULL);
return NULL;
}
@@ -253,6 +251,11 @@ glfs_volumes_init(struct glfs *fs)
if (!vol_assigned(cmd_args))
return -1;
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ fs->ctx->secure_mgmt = 1;
+ fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
if (cmd_args->volfile_server) {
ret = glfs_mgmt_init(fs);
goto out;
@@ -261,8 +264,8 @@ glfs_volumes_init(struct glfs *fs)
fp = get_volfp(fs);
if (!fp) {
- gf_msg("glfs", GF_LOG_ERROR, ENOENT, API_MSG_VOL_SPEC_FILE_ERROR,
- "Cannot reach volume specification file");
+ gf_smsg("glfs", GF_LOG_ERROR, ENOENT, API_MSG_VOL_SPEC_FILE_ERROR,
+ NULL);
ret = -1;
goto out;
}
@@ -277,6 +280,7 @@ out:
///////////////////////////////////////////////////////////////////////////////
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_xlator_option, 3.4.0)
int
pub_glfs_set_xlator_option(struct glfs *fs, const char *xlator, const char *key,
const char *value)
@@ -326,8 +330,7 @@ invalid_fs:
return -1;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_xlator_option, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unset_volfile_server, 3.5.1)
int
pub_glfs_unset_volfile_server(struct glfs *fs, const char *transport,
const char *host, const int port)
@@ -368,6 +371,8 @@ pub_glfs_unset_volfile_server(struct glfs *fs, const char *transport,
list_for_each_entry_safe(server, tmp, &cmd_args->curr_server->list, list)
{
+ if (!server->volfile_server || !server->transport)
+ continue;
if ((!strcmp(server->volfile_server, host) &&
!strcmp(server->transport, transport_val) &&
(server->port == port_val))) {
@@ -385,8 +390,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unset_volfile_server, 3.5.1);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile_server, 3.4.0)
int
pub_glfs_set_volfile_server(struct glfs *fs, const char *transport,
const char *host, int port)
@@ -419,14 +423,11 @@ pub_glfs_set_volfile_server(struct glfs *fs, const char *transport,
server_transport = gf_strdup(transport);
} else if (!strcmp(transport, "rdma")) {
server_transport = gf_strdup(GF_DEFAULT_VOLFILE_TRANSPORT);
- gf_msg("glfs", GF_LOG_WARNING, EINVAL, API_MSG_INVALID_ENTRY,
- "transport RDMA is deprecated, "
- "falling back to tcp");
+ gf_smsg("glfs", GF_LOG_WARNING, EINVAL, API_MSG_TRANS_RDMA_DEP,
+ NULL);
} else {
- gf_msg("glfs", GF_LOG_TRACE, EINVAL, API_MSG_INVALID_ENTRY,
- "transport %s is not supported, "
- "possible values tcp|unix",
- transport);
+ gf_smsg("glfs", GF_LOG_TRACE, EINVAL, API_MSG_TRANS_NOT_SUPPORTED,
+ "transport=%s", transport, NULL);
goto out;
}
} else {
@@ -468,8 +469,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile_server, 3.4.0);
-
/* *
* Used to free the arguments allocated by glfs_set_volfile_server()
*/
@@ -512,6 +511,7 @@ glfs_free_xlator_options(cmd_args_t *cmd_args)
}
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsuid, 3.4.2)
int
pub_glfs_setfsuid(uid_t fsuid)
{
@@ -521,8 +521,7 @@ pub_glfs_setfsuid(uid_t fsuid)
return syncopctx_setfsuid(&fsuid);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsuid, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgid, 3.4.2)
int
pub_glfs_setfsgid(gid_t fsgid)
{
@@ -532,8 +531,7 @@ pub_glfs_setfsgid(gid_t fsgid)
return syncopctx_setfsgid(&fsgid);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgid, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgroups, 3.4.2)
int
pub_glfs_setfsgroups(size_t size, const gid_t *list)
{
@@ -543,8 +541,7 @@ pub_glfs_setfsgroups(size_t size, const gid_t *list)
return syncopctx_setfsgroups(size, list);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgroups, 3.4.2);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsleaseid, 4.0.0)
int
pub_glfs_setfsleaseid(glfs_leaseid_t leaseid)
{
@@ -566,8 +563,6 @@ pub_glfs_setfsleaseid(glfs_leaseid_t leaseid)
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsleaseid, 4.0.0);
-
int
get_fop_attr_glfd(dict_t **fop_attr, struct glfs_fd *glfd)
{
@@ -655,14 +650,19 @@ unset_fop_attr(dict_t **fop_attr)
*fop_attr = NULL;
}
}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0)
struct glfs *
pub_glfs_from_glfd(struct glfs_fd *glfd)
{
+ if (glfd == NULL) {
+ errno = EBADF;
+ return NULL;
+ }
+
return glfd->fs;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0);
-
static void
glfs_fd_destroy(struct glfs_fd *glfd)
{
@@ -724,7 +724,7 @@ glfs_poller(void *data)
fs = data;
- event_dispatch(fs->ctx->event_pool);
+ gf_event_dispatch(fs->ctx->event_pool);
return NULL;
}
@@ -740,6 +740,7 @@ glfs_new_fs(const char *volname)
INIT_LIST_HEAD(&fs->openfds);
INIT_LIST_HEAD(&fs->upcall_list);
+ INIT_LIST_HEAD(&fs->waitq);
PTHREAD_MUTEX_INIT(&fs->mutex, NULL, fs->pthread_flags, GLFS_INIT_MUTEX,
err);
@@ -810,27 +811,45 @@ unlock:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_new, 3.4.0)
struct glfs *
pub_glfs_new(const char *volname)
{
+ if (!volname) {
+ errno = EINVAL;
+ return NULL;
+ }
+
struct glfs *fs = NULL;
+ int i = 0;
int ret = -1;
glusterfs_ctx_t *ctx = NULL;
xlator_t *old_THIS = NULL;
char pname[16] = "";
char msg[32] = "";
- if (!volname) {
+ if (volname[0] == '/' || volname[0] == '-') {
+ if (strncmp(volname, "/snaps/", 7) == 0) {
+ goto label;
+ }
errno = EINVAL;
return NULL;
}
+ for (i = 0; i < strlen(volname); i++) {
+ if (!isalnum(volname[i]) && (volname[i] != '_') &&
+ (volname[i] != '-')) {
+ errno = EINVAL;
+ return NULL;
+ }
+ }
+
+label:
/*
* Do this as soon as possible in case something else depends on
* pool allocations.
*/
- mem_pools_init_early();
- mem_pools_init_late();
+ mem_pools_init();
fs = glfs_new_fs(volname);
if (!fs)
@@ -899,8 +918,7 @@ out:
return fs;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_new, 3.4.0);
-
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_new_from_ctx, 3.7.0)
struct glfs *
priv_glfs_new_from_ctx(glusterfs_ctx_t *ctx)
{
@@ -919,8 +937,7 @@ out:
return fs;
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_new_from_ctx, 3.7.0);
-
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_free_from_ctx, 3.7.0)
void
priv_glfs_free_from_ctx(struct glfs *fs)
{
@@ -956,8 +973,7 @@ priv_glfs_free_from_ctx(struct glfs *fs)
FREE(fs);
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_free_from_ctx, 3.7.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile, 3.4.0)
int
pub_glfs_set_volfile(struct glfs *fs, const char *volfile)
{
@@ -974,8 +990,7 @@ pub_glfs_set_volfile(struct glfs *fs, const char *volfile)
return 0;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_logging, 3.4.0)
int
pub_glfs_set_logging(struct glfs *fs, const char *logfile, int loglevel)
{
@@ -1013,8 +1028,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_logging, 3.4.0);
-
int
glfs_init_wait(struct glfs *fs)
{
@@ -1033,14 +1046,14 @@ glfs_init_wait(struct glfs *fs)
return ret;
}
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_init_done, 3.4.0)
void
priv_glfs_init_done(struct glfs *fs, int ret)
{
glfs_init_cbk init_cbk;
if (!fs) {
- gf_msg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_GLFS_FSOBJ_NULL,
- "fs is NULL");
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_GLFS_FSOBJ_NULL, NULL);
goto out;
}
@@ -1064,8 +1077,6 @@ out:
return;
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_init_done, 3.4.0);
-
int
glfs_init_common(struct glfs *fs)
{
@@ -1093,8 +1104,7 @@ glfs_init_async(struct glfs *fs, glfs_init_cbk cbk)
int ret = -1;
if (!fs || !fs->ctx) {
- gf_msg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_INVALID_ENTRY,
- "fs is not properly initialized.");
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_FS_NOT_INIT, NULL);
errno = EINVAL;
return ret;
}
@@ -1106,6 +1116,7 @@ glfs_init_async(struct glfs *fs, glfs_init_cbk cbk)
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_init, 3.4.0)
int
pub_glfs_init(struct glfs *fs)
{
@@ -1114,8 +1125,7 @@ pub_glfs_init(struct glfs *fs)
DECLARE_OLD_THIS;
if (!fs || !fs->ctx) {
- gf_msg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_INVALID_ENTRY,
- "fs is not properly initialized.");
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_FS_NOT_INIT, NULL);
errno = EINVAL;
return ret;
}
@@ -1139,8 +1149,6 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_init, 3.4.0);
-
static int
glusterfs_ctx_destroy(glusterfs_ctx_t *ctx)
{
@@ -1189,7 +1197,7 @@ glusterfs_ctx_destroy(glusterfs_ctx_t *ctx)
}
/* Free the event pool */
- ret = event_pool_destroy(ctx->event_pool);
+ ret = gf_event_pool_destroy(ctx->event_pool);
/* Free the iobuf pool */
iobuf_pool_destroy(ctx->iobuf_pool);
@@ -1212,11 +1220,13 @@ glusterfs_ctx_destroy(glusterfs_ctx_t *ctx)
glusterfs_graph_destroy_residual(trav_graph);
}
+ GF_FREE(ctx->statedump_path);
FREE(ctx);
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fini, 3.4.0)
int
pub_glfs_fini(struct glfs *fs)
{
@@ -1228,6 +1238,7 @@ pub_glfs_fini(struct glfs *fs)
call_pool_t *call_pool = NULL;
int fs_init = 0;
int err = -1;
+ struct synctask *waittask = NULL;
DECLARE_OLD_THIS;
@@ -1249,6 +1260,13 @@ pub_glfs_fini(struct glfs *fs)
call_pool = fs->ctx->pool;
+ /* Wake up any suspended synctasks */
+ while (!list_empty(&fs->waitq)) {
+ waittask = list_entry(fs->waitq.next, struct synctask, waitq);
+ list_del_init(&waittask->waitq);
+ synctask_wake(waittask);
+ }
+
while (countdown--) {
/* give some time for background frames to finish */
pthread_mutex_lock(&fs->mutex);
@@ -1267,7 +1285,7 @@ pub_glfs_fini(struct glfs *fs)
}
}
pthread_mutex_unlock(&fs->mutex);
- usleep(100000);
+ gf_nanosleep(100000 * GF_US_IN_NS);
}
/* leaked frames may exist, we ignore */
@@ -1300,10 +1318,8 @@ pub_glfs_fini(struct glfs *fs)
graph = subvol->graph;
err = pthread_mutex_lock(&fs->mutex);
if (err != 0) {
- gf_msg("glfs", GF_LOG_ERROR, err, API_MSG_FSMUTEX_LOCK_FAILED,
- "pthread lock on glfs mutex, "
- "returned error: (%s)",
- strerror(err));
+ gf_smsg("glfs", GF_LOG_ERROR, err, API_MSG_FSMUTEX_LOCK_FAILED,
+ "error=%s", strerror(err), NULL);
goto fail;
}
/* check and wait for CHILD_DOWN for active subvol*/
@@ -1311,19 +1327,17 @@ pub_glfs_fini(struct glfs *fs)
while (graph->used) {
err = pthread_cond_wait(&fs->child_down_cond, &fs->mutex);
if (err != 0)
- gf_msg("glfs", GF_LOG_INFO, err,
- API_MSG_COND_WAIT_FAILED,
- "%s cond wait failed %s", subvol->name,
- strerror(err));
+ gf_smsg("glfs", GF_LOG_INFO, err,
+ API_MSG_COND_WAIT_FAILED, "name=%s",
+ subvol->name, "err=%s", strerror(err), NULL);
}
}
err = pthread_mutex_unlock(&fs->mutex);
if (err != 0) {
- gf_msg("glfs", GF_LOG_ERROR, err, API_MSG_FSMUTEX_UNLOCK_FAILED,
- "pthread unlock on glfs mutex, "
- "returned error: (%s)",
- strerror(err));
+ gf_smsg("glfs", GF_LOG_ERROR, err,
+ API_MSG_FSMUTEX_UNLOCK_FAILED, "error=%s",
+ strerror(err), NULL);
goto fail;
}
}
@@ -1358,7 +1372,7 @@ pub_glfs_fini(struct glfs *fs)
syncenv_destroy(ctx->env);
/* Join the poller thread */
- if (event_dispatch_destroy(ctx->event_pool) < 0)
+ if (gf_event_dispatch_destroy(ctx->event_pool) < 0)
ret = -1;
}
@@ -1403,8 +1417,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fini, 3.4.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0)
ssize_t
pub_glfs_get_volfile(struct glfs *fs, void *buf, size_t len)
{
@@ -1430,8 +1443,7 @@ invalid_fs:
return res;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0);
-
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0)
int
priv_glfs_ipc(struct glfs *fs, int opcode, void *xd_in, void **xd_out)
{
@@ -1459,107 +1471,118 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0);
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1)
+int
+priv_glfs_setfspid(struct glfs *fs, pid_t pid)
+{
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &fs->ctx->cmd_args;
+ cmd_args->client_pid = pid;
+ cmd_args->client_pid_set = 1;
+ ret = syncopctx_setfspid(&pid);
+
+ return ret;
+}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_free, 3.7.16)
void
pub_glfs_free(void *ptr)
{
GLFS_FREE(ptr);
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_free, 3.7.16);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_fs, 3.7.16)
struct glfs *
pub_glfs_upcall_get_fs(struct glfs_upcall *arg)
{
return arg->fs;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_fs, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_reason, 3.7.16)
enum glfs_upcall_reason
pub_glfs_upcall_get_reason(struct glfs_upcall *arg)
{
return arg->reason;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_reason, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_event, 3.7.16)
void *
pub_glfs_upcall_get_event(struct glfs_upcall *arg)
{
return arg->event;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_event, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_object, 3.7.16)
struct glfs_object *
pub_glfs_upcall_inode_get_object(struct glfs_upcall_inode *arg)
{
return arg->object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_object, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_flags, 3.7.16)
uint64_t
pub_glfs_upcall_inode_get_flags(struct glfs_upcall_inode *arg)
{
return arg->flags;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_flags, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_stat, 3.7.16)
struct stat *
pub_glfs_upcall_inode_get_stat(struct glfs_upcall_inode *arg)
{
return &arg->buf;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_stat, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_expire, 3.7.16)
uint64_t
pub_glfs_upcall_inode_get_expire(struct glfs_upcall_inode *arg)
{
return arg->expire_time_attr;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_expire, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pobject, 3.7.16)
struct glfs_object *
pub_glfs_upcall_inode_get_pobject(struct glfs_upcall_inode *arg)
{
return arg->p_object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pobject, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pstat, 3.7.16)
struct stat *
pub_glfs_upcall_inode_get_pstat(struct glfs_upcall_inode *arg)
{
return &arg->p_buf;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pstat, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpobject, 3.7.16)
struct glfs_object *
pub_glfs_upcall_inode_get_oldpobject(struct glfs_upcall_inode *arg)
{
return arg->oldp_object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpobject, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpstat, 3.7.16)
struct stat *
pub_glfs_upcall_inode_get_oldpstat(struct glfs_upcall_inode *arg)
{
return &arg->oldp_buf;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpstat, 3.7.16);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_object, 4.1.6)
struct glfs_object *
pub_glfs_upcall_lease_get_object(struct glfs_upcall_lease *arg)
{
- return arg->object;
+ return arg->object;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_object, 4.1.6);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_lease_type, 4.1.6)
uint32_t
pub_glfs_upcall_lease_get_lease_type(struct glfs_upcall_lease *arg)
{
return arg->lease_type;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_lease_type, 4.1.6);
/* definitions of the GLFS_SYSRQ_* chars are in glfs.h */
static struct glfs_sysrq_help {
@@ -1569,6 +1592,7 @@ static struct glfs_sysrq_help {
{GLFS_SYSRQ_STATEDUMP, "(S)tatedump"},
{0, NULL}};
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_sysrq, 3.10.0)
int
pub_glfs_sysrq(struct glfs *fs, char sysrq)
{
@@ -1608,8 +1632,8 @@ pub_glfs_sysrq(struct glfs *fs, char sysrq)
gf_proc_dump_info(SIGUSR1, ctx);
break;
default:
- gf_msg("glfs", GF_LOG_ERROR, ENOTSUP, API_MSG_INVALID_ENTRY,
- "'%c' is not a valid sysrq", sysrq);
+ gf_smsg("glfs", GF_LOG_ERROR, ENOTSUP, API_MSG_INVALID_SYSRQ,
+ "sysrq=%c", sysrq, NULL);
errno = ENOTSUP;
ret = -1;
}
@@ -1617,8 +1641,7 @@ out:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_sysrq, 3.10.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_register, 3.13.0)
int
pub_glfs_upcall_register(struct glfs *fs, uint32_t event_list,
glfs_upcall_cbk cbk, void *data)
@@ -1640,8 +1663,8 @@ pub_glfs_upcall_register(struct glfs *fs, uint32_t event_list,
if ((event_list != GLFS_EVENT_ANY) && (event_list & ~up_events)) {
errno = EINVAL;
ret = -1;
- gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_INVALID_ARG,
- "invalid event_list (0x%08x)", event_list);
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "event_list=(0x%08x)", event_list, NULL);
goto out;
}
@@ -1674,8 +1697,7 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_register, 3.13.0);
-
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0)
int
pub_glfs_upcall_unregister(struct glfs *fs, uint32_t event_list)
{
@@ -1693,8 +1715,8 @@ pub_glfs_upcall_unregister(struct glfs *fs, uint32_t event_list)
if ((event_list != GLFS_EVENT_ANY) && (event_list & ~up_events)) {
errno = EINVAL;
ret = -1;
- gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_INVALID_ARG,
- "invalid event_list (0x%08x)", event_list);
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "event_list=(0x%08x)", event_list, NULL);
goto out;
}
@@ -1722,4 +1744,63 @@ invalid_fs:
return ret;
}
-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0);
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 7.0)
+int
+pub_glfs_set_statedump_path(struct glfs *fs, const char *path)
+{
+ struct stat st;
+ int ret;
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!path) {
+ gf_log("glfs", GF_LOG_ERROR, "path is NULL");
+ errno = EINVAL;
+ goto err;
+ }
+
+ /* If path is not present OR, if it is directory AND has enough permission
+ * to create files, then proceed */
+ ret = sys_stat(path, &st);
+ if (ret && errno != ENOENT) {
+ gf_log("glfs", GF_LOG_ERROR, "%s: not a valid path (%s)", path,
+ strerror(errno));
+ errno = EINVAL;
+ goto err;
+ }
+
+ if (!ret) {
+ /* file is present, now check other things */
+ if (!S_ISDIR(st.st_mode)) {
+ gf_log("glfs", GF_LOG_ERROR, "%s: path is not directory", path);
+ errno = EINVAL;
+ goto err;
+ }
+ if (sys_access(path, W_OK | X_OK) < 0) {
+ gf_log("glfs", GF_LOG_ERROR,
+ "%s: path doesn't have write permission", path);
+ errno = EPERM;
+ goto err;
+ }
+ }
+
+ /* If set, it needs to be freed, so we don't have leak */
+ GF_FREE(fs->ctx->statedump_path);
+
+ fs->ctx->statedump_path = gf_strdup(path);
+ if (!fs->ctx->statedump_path) {
+ gf_log("glfs", GF_LOG_ERROR,
+ "%s: failed to set statedump path, no memory", path);
+ errno = ENOMEM;
+ goto err;
+ }
+
+ __GLFS_EXIT_FS;
+
+ return 0;
+err:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return -1;
+}
diff --git a/api/src/glfs.h b/api/src/glfs.h
index cd642a5ea20..279d11d58ee 100644
--- a/api/src/glfs.h
+++ b/api/src/glfs.h
@@ -20,6 +20,17 @@
both the library and the application.
*/
+/* Values for valid flags to be used when using XXXsetattr, to set multiple
+ attribute values passed via the related stat structure.
+ */
+
+#define GFAPI_SET_ATTR_MODE 0x1
+#define GFAPI_SET_ATTR_UID 0x2
+#define GFAPI_SET_ATTR_GID 0x4
+#define GFAPI_SET_ATTR_SIZE 0x8
+#define GFAPI_SET_ATTR_ATIME 0x10
+#define GFAPI_SET_ATTR_MTIME 0x20
+
#ifndef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64
#endif
@@ -40,7 +51,40 @@
#include <sys/cdefs.h>
#include <dirent.h>
#include <sys/statvfs.h>
-#include <inttypes.h>
+#include <stdint.h>
+#include <sys/time.h>
+
+/*
+ * For off64_t to be defined, we need both
+ * __USE_LARGEFILE64 to be true and __off64_t_defnined to be
+ * false. But, making __USE_LARGEFILE64 true causes other issues
+ * such as redinition of stat and fstat to stat64 and fstat64
+ * respectively which again causes compilation issues.
+ * Without off64_t being defined, this will not compile as
+ * copy_file_range uses off64_t. Hence define it here. First
+ * check whether __off64_t_defined is true or not. <unistd.h>
+ * sets that flag when it defines off64_t. If __off64_t_defined
+ * is false and __USE_FILE_OFFSET64 is true, then go on to define
+ * off64_t using __off64_t.
+ */
+#ifndef GF_BSD_HOST_OS
+#if defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined)
+typedef __off64_t off64_t;
+#endif /* defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) */
+#else
+#include <stdio.h>
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+#endif /* GF_BSD_HOST_OS */
#if defined(HAVE_SYS_ACL_H) || (defined(USE_POSIX_ACLS) && USE_POSIX_ACLS)
#include <sys/acl.h>
@@ -371,6 +415,116 @@ glfs_get_volumeid(glfs_t *fs, char *volid, size_t size) __THROW
struct glfs_fd;
typedef struct glfs_fd glfs_fd_t;
+/*
+ * Mask for request/result items in the struct glfs_stat.
+ *
+ * Query request/result mask for glfs_stat() (family of functions) and
+ * struct glfs_stat::glfs_st_mask.
+ *
+ * These bits should be set in the mask argument of glfs_stat() (family of
+ * functions) to request particular items when calling glfs_stat().
+ *
+ * NOTE: Lower order 32 bits are used to reflect statx(2) bits. For Gluster
+ * specific attrs/extensions, use higher order 32 bits.
+ *
+ */
+#define GLFS_STAT_TYPE 0x0000000000000001U /* Want/got stx_mode & S_IFMT */
+#define GLFS_STAT_MODE 0x0000000000000002U /* Want/got stx_mode & ~S_IFMT */
+#define GLFS_STAT_NLINK 0x0000000000000004U /* Want/got stx_nlink */
+#define GLFS_STAT_UID 0x0000000000000008U /* Want/got stx_uid */
+#define GLFS_STAT_GID 0x0000000000000010U /* Want/got stx_gid */
+#define GLFS_STAT_ATIME 0x0000000000000020U /* Want/got stx_atime */
+#define GLFS_STAT_MTIME 0x0000000000000040U /* Want/got stx_mtime */
+#define GLFS_STAT_CTIME 0x0000000000000080U /* Want/got stx_ctime */
+#define GLFS_STAT_INO 0x0000000000000100U /* Want/got stx_ino */
+#define GLFS_STAT_SIZE 0x0000000000000200U /* Want/got stx_size */
+#define GLFS_STAT_BLOCKS 0x0000000000000400U /* Want/got stx_blocks */
+#define GLFS_STAT_BASIC_STATS \
+ 0x00000000000007ffU /* Items in the normal stat struct */
+#define GLFS_STAT_BTIME 0x0000000000000800U /* Want/got stx_btime */
+#define GLFS_STAT_ALL 0x0000000000000fffU /* All currently supported flags */
+#define GLFS_STAT_RESERVED \
+ 0x8000000000000000U /* Reserved to denote future expansion */
+
+/* Macros for checking validity of struct glfs_stat members.*/
+#define GLFS_STAT_TYPE_VALID(stmask) (stmask & GLFS_STAT_TYPE)
+#define GLFS_STAT_MODE_VALID(stmask) (stmask & GLFS_STAT_MODE)
+#define GLFS_STAT_NLINK_VALID(stmask) (stmask & GLFS_STAT_NLINK)
+#define GLFS_STAT_UID_VALID(stmask) (stmask & GLFS_STAT_UID)
+#define GLFS_STAT_GID_VALID(stmask) (stmask & GLFS_STAT_GID)
+#define GLFS_STAT_ATIME_VALID(stmask) (stmask & GLFS_STAT_ATIME)
+#define GLFS_STAT_MTIME_VALID(stmask) (stmask & GLFS_STAT_MTIME)
+#define GLFS_STAT_CTIME_VALID(stmask) (stmask & GLFS_STAT_CTIME)
+#define GLFS_STAT_INO_VALID(stmask) (stmask & GLFS_STAT_INO)
+#define GLFS_STAT_SIZE_VALID(stmask) (stmask & GLFS_STAT_SIZE)
+#define GLFS_STAT_BLOCKS_VALID(stmask) (stmask & GLFS_STAT_BLOCKS)
+#define GLFS_STAT_BTIME_VALID(stmask) (stmask & GLFS_STAT_BTIME)
+#define GLFS_STAT_GFID_VALID(stmask) (stmask & GLFS_STAT_GFID)
+
+/*
+ * Attributes to be found in glfs_st_attributes and masked in
+ * glfs_st_attributes_mask.
+ *
+ * These give information about the features or the state of a file that might
+ * be of use to programs.
+ *
+ * NOTE: Lower order 32 bits are used to reflect statx(2) attribute bits. For
+ * Gluster specific attrs, use higher order 32 bits.
+ *
+ * NOTE: We do not support any file attributes or state as yet!
+ */
+#define GLFS_STAT_ATTR_RESERVED \
+ 0x8000000000000000U /* Reserved to denote future expansion */
+
+/* Extended file attribute structure.
+ *
+ * The caller passes a mask of what they're specifically interested in as a
+ * parameter to glfs_stat(). What glfs_stat() actually got will be indicated
+ * in glfs_st_mask upon return.
+ *
+ * For each bit in the mask argument:
+ *
+ * - if the datum is not supported:
+ *
+ * - the bit will be cleared, and
+ *
+ * - the datum value is undefined
+ *
+ * - otherwise, if explicitly requested:
+ *
+ * - the field will be filled in and the bit will be set;
+ *
+ * - otherwise, if not requested, but available in, it will be filled in
+ * anyway, and the bit will be set upon return;
+ *
+ * - otherwise the field and the bit will be cleared before returning.
+ *
+ */
+
+struct glfs_stat {
+ uint64_t glfs_st_mask; /* What results were written [uncond] */
+ uint64_t glfs_st_attributes; /* Flags conveying information about the file
+ [uncond] */
+ uint64_t glfs_st_attributes_mask; /* Mask to show what's supported in
+ st_attributes [ucond] */
+ struct timespec glfs_st_atime; /* Last access time */
+ struct timespec glfs_st_btime; /* File creation time */
+ struct timespec glfs_st_ctime; /* Last attribute change time */
+ struct timespec glfs_st_mtime; /* Last data modification time */
+ ino_t glfs_st_ino; /* Inode number */
+ off_t glfs_st_size; /* File size */
+ blkcnt_t glfs_st_blocks; /* Number of 512-byte blocks allocated */
+ uint32_t glfs_st_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ uint32_t glfs_st_rdev_minor;
+ uint32_t glfs_st_dev_major; /* ID of device containing file [uncond] */
+ uint32_t glfs_st_dev_minor;
+ blksize_t glfs_st_blksize; /* Preferred general I/O size [uncond] */
+ nlink_t glfs_st_nlink; /* Number of hard links */
+ uid_t glfs_st_uid; /* User ID of owner */
+ gid_t glfs_st_gid; /* Group ID of owner */
+ mode_t glfs_st_mode; /* File mode */
+};
+
#define GLFS_LEASE_ID_SIZE 16 /* 128bits */
typedef char glfs_leaseid_t[GLFS_LEASE_ID_SIZE];
@@ -504,10 +658,14 @@ glfs_set_xlator_option(glfs_t *fs, const char *xlator, const char *key,
time of issuing the async IO call. This can be used by the
caller to differentiate different instances of the async requests
in a common callback function.
+
+ @prestat and @poststat are allocated on the stack, that are auto destroyed
+ post the callback function returns.
*/
-typedef void (*glfs_io_cbk)(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
- struct stat *poststat, void *data);
+typedef void (*glfs_io_cbk)(glfs_fd_t *fd, ssize_t ret,
+ struct glfs_stat *prestat,
+ struct glfs_stat *poststat, void *data);
// glfs_{read,write}[_async]
@@ -522,12 +680,12 @@ glfs_write(glfs_fd_t *fd, const void *buf, size_t count, int flags) __THROW
int
glfs_read_async(glfs_fd_t *fd, void *buf, size_t count, int flags,
glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_read_async, future);
+ GFAPI_PUBLIC(glfs_read_async, 6.0);
int
glfs_write_async(glfs_fd_t *fd, const void *buf, size_t count, int flags,
glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_write_async, future);
+ GFAPI_PUBLIC(glfs_write_async, 6.0);
// glfs_{read,write}v[_async]
@@ -542,33 +700,33 @@ glfs_writev(glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
int
glfs_readv_async(glfs_fd_t *fd, const struct iovec *iov, int count, int flags,
glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_readv_async, future);
+ GFAPI_PUBLIC(glfs_readv_async, 6.0);
int
glfs_writev_async(glfs_fd_t *fd, const struct iovec *iov, int count, int flags,
glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_writev_async, future);
+ GFAPI_PUBLIC(glfs_writev_async, 6.0);
// glfs_p{read,write}[_async]
ssize_t
glfs_pread(glfs_fd_t *fd, void *buf, size_t count, off_t offset, int flags,
- struct stat *poststat) __THROW GFAPI_PUBLIC(glfs_pread, future);
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_pread, 6.0);
ssize_t
glfs_pwrite(glfs_fd_t *fd, const void *buf, size_t count, off_t offset,
- int flags, struct stat *prestat, struct stat *poststat) __THROW
- GFAPI_PUBLIC(glfs_pwrite, future);
+ int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_pwrite, 6.0);
int
glfs_pread_async(glfs_fd_t *fd, void *buf, size_t count, off_t offset,
int flags, glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_pread_async, future);
+ GFAPI_PUBLIC(glfs_pread_async, 6.0);
int
glfs_pwrite_async(glfs_fd_t *fd, const void *buf, int count, off_t offset,
int flags, glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_pwrite_async, future);
+ GFAPI_PUBLIC(glfs_pwrite_async, 6.0);
// glfs_p{read,write}v[_async]
@@ -583,30 +741,38 @@ glfs_pwritev(glfs_fd_t *fd, const struct iovec *iov, int iovcnt, off_t offset,
int
glfs_preadv_async(glfs_fd_t *fd, const struct iovec *iov, int count,
off_t offset, int flags, glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_preadv_async, future);
+ GFAPI_PUBLIC(glfs_preadv_async, 6.0);
int
glfs_pwritev_async(glfs_fd_t *fd, const struct iovec *iov, int count,
off_t offset, int flags, glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_pwritev_async, future);
+ GFAPI_PUBLIC(glfs_pwritev_async, 6.0);
off_t
glfs_lseek(glfs_fd_t *fd, off_t offset, int whence) __THROW
GFAPI_PUBLIC(glfs_lseek, 3.4.0);
+ssize_t
+glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+ unsigned int flags, struct glfs_stat *statbuf,
+ struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_copy_file_range, 6.0);
+
int
glfs_truncate(glfs_t *fs, const char *path, off_t length) __THROW
GFAPI_PUBLIC(glfs_truncate, 3.7.15);
int
-glfs_ftruncate(glfs_fd_t *fd, off_t length, struct stat *prestat,
- struct stat *poststat) __THROW
- GFAPI_PUBLIC(glfs_ftruncate, future);
+glfs_ftruncate(glfs_fd_t *fd, off_t length, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_ftruncate, 6.0);
int
glfs_ftruncate_async(glfs_fd_t *fd, off_t length, glfs_io_cbk fn,
void *data) __THROW
- GFAPI_PUBLIC(glfs_ftruncate_async, future);
+ GFAPI_PUBLIC(glfs_ftruncate_async, 6.0);
int
glfs_lstat(glfs_t *fs, const char *path, struct stat *buf) __THROW
@@ -621,21 +787,21 @@ glfs_fstat(glfs_fd_t *fd, struct stat *buf) __THROW
GFAPI_PUBLIC(glfs_fstat, 3.4.0);
int
-glfs_fsync(glfs_fd_t *fd, struct stat *prestat, struct stat *poststat) __THROW
- GFAPI_PUBLIC(glfs_fsync, future);
+glfs_fsync(glfs_fd_t *fd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_fsync, 6.0);
int
glfs_fsync_async(glfs_fd_t *fd, glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_fsync_async, future);
+ GFAPI_PUBLIC(glfs_fsync_async, 6.0);
int
-glfs_fdatasync(glfs_fd_t *fd, struct stat *prestat,
- struct stat *poststat) __THROW
- GFAPI_PUBLIC(glfs_fdatasync, future);
+glfs_fdatasync(glfs_fd_t *fd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_fdatasync, 6.0);
int
glfs_fdatasync_async(glfs_fd_t *fd, glfs_io_cbk fn, void *data) __THROW
- GFAPI_PUBLIC(glfs_fdatasync_async, future);
+ GFAPI_PUBLIC(glfs_fdatasync_async, 6.0);
int
glfs_access(glfs_t *fs, const char *path, int mode) __THROW
@@ -817,7 +983,7 @@ glfs_discard(glfs_fd_t *fd, off_t offset, size_t len) __THROW
int
glfs_discard_async(glfs_fd_t *fd, off_t length, size_t lent, glfs_io_cbk fn,
- void *data) __THROW GFAPI_PUBLIC(glfs_discard_async, future);
+ void *data) __THROW GFAPI_PUBLIC(glfs_discard_async, 6.0);
int
glfs_zerofill(glfs_fd_t *fd, off_t offset, off_t len) __THROW
@@ -825,8 +991,7 @@ glfs_zerofill(glfs_fd_t *fd, off_t offset, off_t len) __THROW
int
glfs_zerofill_async(glfs_fd_t *fd, off_t length, off_t len, glfs_io_cbk fn,
- void *data) __THROW
- GFAPI_PUBLIC(glfs_zerofill_async, future);
+ void *data) __THROW GFAPI_PUBLIC(glfs_zerofill_async, 6.0);
char *
glfs_getcwd(glfs_t *fs, char *buf, size_t size) __THROW
@@ -1239,5 +1404,82 @@ int
glfs_lease(glfs_fd_t *glfd, glfs_lease_t *lease, glfs_recall_cbk fn,
void *data) __THROW GFAPI_PUBLIC(glfs_lease, 4.0.0);
+/*
+ SYNOPSIS
+
+ glfs_fsetattr: Function to set attributes.
+ glfs_setattr: Function to set attributes
+
+ DESCRIPTION
+
+ The functions are used to set attributes on the file.
+
+ PARAMETERS
+
+ @glfs_fsetattr
+
+ @glfd: The fd of the file for which the attributes are to be set,
+ this fd is returned by glfs_open/glfs_create.
+
+ @glfs_setattr
+
+ @fs: File object.
+
+ @path: The path of the file that is being operated on.
+
+ @follow: Flag used to resolve symlink.
+
+
+ @stat: Struct that has information about the file.
+
+ @valid: This is the mask bit, that accepts GFAPI_SET_ATTR* masks.
+ Refer glfs.h to see the mask definitions.
+
+ Both functions are similar in functionality, just that the
+ func setattr() uses file path whereas the func fsetattr()
+ uses the fd.
+
+ RETURN VALUES
+ 0: Successful completion
+ <0: Failure. @errno will be set with the type of failure
+
+ */
+
+int
+glfs_fsetattr(struct glfs_fd *glfd, struct glfs_stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_fsetattr, 6.0);
+
+int
+glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+ int follow) __THROW GFAPI_PUBLIC(glfs_setattr, 6.0);
+
+/*
+ SYNOPSIS
+
+ glfs_set_statedump_path: Function to set statedump path.
+
+ DESCRIPTION
+
+ This function is used to set statedump directory
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @path: statedump path. Should be a directory. But the API won't fail if the
+ directory doesn't exist yet, as one may create it later.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+ */
+
+int
+glfs_set_statedump_path(struct glfs *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_set_statedump_path, 7.0);
+
__END_DECLS
#endif /* !_GLFS_H */
diff --git a/autogen.sh b/autogen.sh
index a4affc277bf..c8cdc3f89fa 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -85,7 +85,7 @@ $TOOL --automake --copy --force
echo Running ${AUTOCONF}...
$AUTOCONF
echo Running ${AUTOMAKE}...
-$AUTOMAKE --add-missing --force-missing --copy --foreign
+$AUTOMAKE --add-missing --force-missing --copy
# Instruct user on next steps
echo
diff --git a/build-aux/pkg-version b/build-aux/pkg-version
index 83d4a5f9136..17ceab70c03 100755
--- a/build-aux/pkg-version
+++ b/build-aux/pkg-version
@@ -1,9 +1,13 @@
-#!/bin/sh
+#!/bin/bash
# To override version/release from git,
# create VERSION file containing text with version/release
# eg. v3.4.0-1
-PKG_VERSION=`cat VERSION 2> /dev/null || git describe --tags --match "v[0-9]*"`
+
+# One thing to note, If one does 'git clone --depth N glusterfs.git',
+# the git describe command doesn't work. Hence you notice below that
+# we have added timestamp as version (YYYY.MM.DD) and release (HH.mmss)
+PKG_VERSION=`cat VERSION 2> /dev/null || git describe --tags --match "v[0-9]*" 2>/dev/null`
get_version()
{
@@ -18,7 +22,11 @@ get_version()
sub(/^v/,"") ; print $1
}'
- echo $PKG_VERSION | awk "$AWK_VERSION" | tr -cd '[:alnum:].'
+ version=$(echo $PKG_VERSION | awk "$AWK_VERSION" | tr -cd '[:alnum:].')
+ if [ "x${version}" == "x" ] ; then
+ version=$(date +%Y.%m.%d | tr -d '\n')
+ fi
+ echo $version | tr -d '\n'
}
get_release()
@@ -37,7 +45,11 @@ get_release()
else if (NF == 4) print $2, $3, "git" substr($4, 2)
}'
- echo $PKG_VERSION | awk "$AWK_RELEASE" | tr -cd '[:alnum:].'
+ release=$(echo $PKG_VERSION | awk "$AWK_RELEASE" | tr -cd '[:alnum:].')
+ if [ "x${release}" == "x" ] ; then
+ release=$(date +%H.%M%S | tr -d '\n')
+ fi
+ echo $release | tr -d '\n'
}
if test "x$1" = "x--full"; then
diff --git a/cli/src/Makefile.am b/cli/src/Makefile.am
index 6be070f6ff7..16063f27c7f 100644
--- a/cli/src/Makefile.am
+++ b/cli/src/Makefile.am
@@ -1,12 +1,11 @@
-if WITH_SERVER
sbin_PROGRAMS = gluster
-endif
gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c cli-cmd-global.c \
cli-cmd-volume.c cli-cmd-peer.c cli-rpc-ops.c cli-cmd-parser.c\
cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c cli-quotad-client.c cli-cmd-snapshot.c
gluster_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD) \
+ $(top_builddir)/libglusterd/src/libglusterd.la \
$(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
$(XML_LIBS)
@@ -15,13 +14,14 @@ gluster_LDFLAGS = $(GF_LDFLAGS)
noinst_HEADERS = cli.h cli-mem-types.h cli-cmd.h cli-quotad-client.h
AM_CPPFLAGS = $(GF_CPPFLAGS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
- -I$(top_srcdir)/rpc/xdr/src\
- -I$(top_builddir)/rpc/xdr/src\
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/libglusterd/src \
+ -I$(top_builddir)/rpc/xdr/src \
-DDATADIR=\"$(localstatedir)\" \
-DCONFDIR=\"$(sysconfdir)/glusterfs\" \
-DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\"\
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) -DSBIN_DIR=\"$(sbindir)\"
+ -DGLFSHEAL_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\"\
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE)
AM_CFLAGS = -Wall $(GF_CFLAGS) $(XML_CFLAGS)
@@ -30,5 +30,8 @@ CLEANFILES =
$(top_builddir)/libglusterfs/src/libglusterfs.la:
$(MAKE) -C $(top_builddir)/libglusterfs/src/ all
+$(top_builddir)/libglusterd/src/libglusterd.la:
+ $(MAKE) -C $(top_builddir)/libglusterd/src/ all
+
install-data-hook:
$(mkdir_p) $(DESTDIR)$(localstatedir)/run/gluster
diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c
index e1bedf13542..2c9a5f01bb1 100644
--- a/cli/src/cli-cmd-global.c
+++ b/cli/src/cli-cmd-global.c
@@ -23,11 +23,9 @@
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "cli1-xdr.h"
-#include "run.h"
-#include "syscall.h"
-#include "common-utils.h"
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
int
cli_cmd_global_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
@@ -36,6 +34,10 @@ int
cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount);
+int
+cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
struct cli_cmd global_cmds[] = {
{
"global help",
@@ -48,6 +50,11 @@ struct cli_cmd global_cmds[] = {
cli_cmd_get_state_cbk,
"Get local state representation of mentioned daemon",
},
+ {
+ "nfs-ganesha {enable| disable} ",
+ cli_cmd_ganesha_cbk,
+ "Enable/disable NFS-Ganesha support",
+ },
{NULL, NULL, NULL}};
int
@@ -89,8 +96,9 @@ out:
}
int
-cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+
{
int sent = 0;
int parse_error = 0;
@@ -101,10 +109,53 @@ cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
cli_local_t *local = NULL;
char *op_errstr = NULL;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GANESHA];
+
frame = create_frame(THIS, THIS->ctx->pool);
if (!frame)
goto out;
+ ret = cli_cmd_ganesha_parse(state, words, wordcount, &options, &op_errstr);
+ if (ret) {
+ if (op_errstr) {
+ cli_err("%s", op_errstr);
+ GF_FREE(op_errstr);
+ } else
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Setting global option failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
+
+int
+cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
+
ret = cli_cmd_get_state_parse(state, words, wordcount, &options,
&op_errstr);
@@ -120,6 +171,12 @@ cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_STATE];
diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c
index 04dd2efc220..e961d88da86 100644
--- a/cli/src/cli-cmd-misc.c
+++ b/cli/src/cli-cmd-misc.c
@@ -18,14 +18,9 @@
#include "cli-mem-types.h"
#include "protocol-common.h"
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
-
extern struct cli_cmd volume_cmds[];
extern struct cli_cmd bitrot_cmds[];
extern struct cli_cmd quota_cmds[];
-extern struct cli_cmd tier_cmds[];
extern struct cli_cmd cli_probe_cmds[];
extern struct cli_cmd cli_log_cmds[];
extern struct cli_cmd cli_system_cmds[];
@@ -58,19 +53,9 @@ int
cli_cmd_display_help(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
- struct cli_cmd *cmd[] = {
- cli_misc_cmds,
- cli_probe_cmds,
- volume_cmds,
- bitrot_cmds,
- quota_cmds,
-#if !defined(__NetBSD__)
- tier_cmds,
-#endif
- snapshot_cmds,
- global_cmds,
- NULL
- };
+ static struct cli_cmd *cmd[] = {
+ cli_misc_cmds, cli_probe_cmds, volume_cmds, bitrot_cmds,
+ quota_cmds, snapshot_cmds, global_cmds, NULL};
struct cli_cmd *cmd_ind = NULL;
int i = 0;
gf_boolean_t list_all = _gf_false;
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 94deb9125a2..34620b4a31b 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -18,15 +18,15 @@
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "dict.h"
-#include "list.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
#include "protocol-common.h"
#include "cli1-xdr.h"
#define MAX_SNAP_DESCRIPTION_LEN 1024
-struct snap_config_opt_vals_ snap_confopt_vals[] = {
+static struct snap_config_opt_vals_ snap_confopt_vals[] = {
{.op_name = "snap-max-hard-limit",
.question = "Changing snapshot-max-hard-limit "
"will limit the creation of new snapshots "
@@ -80,6 +80,95 @@ str_getunamb(const char *tok, char **opwords)
}
int32_t
+cli_cmd_ta_brick_parse(const char **words, int wordcount, char **ta_brick)
+{
+ char *host_name = NULL;
+ char *tmp_host = NULL;
+ char *delimiter = NULL;
+ cli_brick_t *brick = NULL;
+ int ret = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(wordcount);
+
+ if (validate_brick_name((char *)words[wordcount - 1])) {
+ cli_err(
+ "Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[wordcount - 1]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[wordcount - 1], ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
+ }
+
+ tmp_host = gf_strdup((char *)words[wordcount - 1]);
+ if (!tmp_host) {
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name(tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to retrieve "
+ "hostname");
+ goto out;
+ }
+
+ if (!(strcmp(host_name, "localhost") && strcmp(host_name, "127.0.0.1") &&
+ strncmp(host_name, "0.", 2))) {
+ cli_err(
+ "Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
+ cli_err(
+ "internet address '%s' does not conform to "
+ "standards",
+ host_name);
+ }
+
+ brick = GF_MALLOC(sizeof(cli_brick_t), gf_common_list_node);
+ if (brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+
+ brick->name = words[wordcount - 1];
+ brick->len = strlen(words[wordcount - 1]);
+ *ta_brick = GF_MALLOC(brick->len + 3, gf_common_mt_char);
+ if (*ta_brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+
+ strcat(*ta_brick, " ");
+ strcat(*ta_brick, brick->name);
+ strcat(*ta_brick, " ");
+out:
+ if (tmp_host) {
+ GF_FREE(tmp_host);
+ tmp_host = NULL;
+ }
+ if (brick) {
+ GF_FREE(brick);
+ brick = NULL;
+ }
+
+ return ret;
+}
+
+int32_t
cli_cmd_bricks_parse(const char **words, int wordcount, int brick_index,
char **bricks, int *brick_count)
{
@@ -144,7 +233,7 @@ cli_cmd_bricks_parse(const char **words, int wordcount, int brick_index,
GF_FREE(tmp_host);
goto out;
}
- if (!valid_internet_address(host_name, _gf_false)) {
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
cli_err(
"internet address '%s' does not conform to "
"standards",
@@ -392,11 +481,6 @@ cli_validate_disperse_volume(char *word, gf1_cluster_type type,
ret = 2;
}
break;
- case GF_CLUSTER_TYPE_TIER:
- cli_err(
- "tier-dispersed volume is not "
- "supported");
- goto out;
case GF_CLUSTER_TYPE_REPLICATE:
cli_err(
"replicated-dispersed volume is not "
@@ -481,14 +565,17 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
char *trans_type = NULL;
int32_t index = 0;
char *bricks = NULL;
+ char *ta_brick = NULL;
int32_t brick_count = 0;
- char *opwords[] = {"replica", "stripe", "transport", "disperse",
- "redundancy", "disperse-data", "arbiter", NULL};
+ static char *opwords[] = {"replica", "stripe", "transport",
+ "disperse", "redundancy", "disperse-data",
+ "arbiter", "thin-arbiter", NULL};
char *w = NULL;
int op_count = 0;
int32_t replica_count = 1;
int32_t arbiter_count = 0;
+ int32_t thin_arbiter_count = 0;
int32_t stripe_count = 1;
int32_t disperse_count = -1;
int32_t redundancy_count = -1;
@@ -542,12 +629,6 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
case GF_CLUSTER_TYPE_STRIPE:
cli_err("stripe option not supported");
goto out;
- case GF_CLUSTER_TYPE_TIER:
- cli_err(
- "replicated-tiered volume is not "
- "supported");
- goto out;
- break;
case GF_CLUSTER_TYPE_DISPERSE:
cli_err(
"replicated-dispersed volume is not "
@@ -592,6 +673,26 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
if (ret)
goto out;
index += 2;
+ } else if (!strcmp(words[index], "thin-arbiter")) {
+ ret = gf_string2int(words[index + 1], &thin_arbiter_count);
+ if ((ret == -1) || (thin_arbiter_count != 1) ||
+ (replica_count != 2)) {
+ cli_err(
+ "For thin-arbiter "
+ "configuration, "
+ "replica count must be"
+ " 2 and thin-arbiter count "
+ "must be 1. The 3rd "
+ "brick of the replica "
+ "will be the thin-arbiter brick");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, "thin-arbiter-count",
+ thin_arbiter_count);
+ if (ret)
+ goto out;
+ index += 2;
}
}
@@ -600,7 +701,7 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
if ((arbiter_count == 1) && (replica_count == 2))
replica_count += arbiter_count;
- if (replica_count == 2) {
+ if (replica_count == 2 && thin_arbiter_count == 0) {
if (strcmp(words[wordcount - 1], "force")) {
question =
"Replica 2 volumes are prone"
@@ -668,6 +769,12 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
"option.");
ret = -1;
goto out;
+ } else if ((strcmp(w, "thin-arbiter") == 0)) {
+ cli_err(
+ "thin-arbiter option must be preceded by replica "
+ "option.");
+ ret = -1;
+ goto out;
} else {
GF_ASSERT(!"opword mismatch");
ret = -1;
@@ -691,7 +798,20 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
wc = wordcount - 1;
}
- ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks, &brick_count);
+ // Exclude the thin-arbiter-brick i.e. last brick in the bricks list
+ if (thin_arbiter_count == 1) {
+ ret = cli_cmd_bricks_parse(words, wc - 1, brick_index, &bricks,
+ &brick_count);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_ta_brick_parse(words, wc, &ta_brick);
+
+ } else {
+ ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks,
+ &brick_count);
+ }
+
if (ret)
goto out;
@@ -750,6 +870,12 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
if (ret)
goto out;
+ if (thin_arbiter_count == 1) {
+ ret = dict_set_dynstr(dict, "ta-brick", ta_brick);
+ if (ret)
+ goto out;
+ }
+
ret = dict_set_int32(dict, "count", brick_count);
if (ret)
goto out;
@@ -763,6 +889,7 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
out:
if (ret) {
GF_FREE(bricks);
+ GF_FREE(ta_brick);
gf_log("cli", GF_LOG_ERROR, "Unable to parse create volume CLI");
if (dict)
dict_unref(dict);
@@ -1068,19 +1195,19 @@ cli_cmd_quota_parse(const char **words, int wordcount, dict_t **options)
};
int64_t value = 0;
gf_quota_type type = GF_QUOTA_OPTION_TYPE_NONE;
- char *opwords[] = {"enable",
- "disable",
- "limit-usage",
- "remove",
- "list",
- "alert-time",
- "soft-timeout",
- "hard-timeout",
- "default-soft-limit",
- "limit-objects",
- "list-objects",
- "remove-objects",
- NULL};
+ static char *opwords[] = {"enable",
+ "disable",
+ "limit-usage",
+ "remove",
+ "list",
+ "alert-time",
+ "soft-timeout",
+ "hard-timeout",
+ "default-soft-limit",
+ "limit-objects",
+ "list-objects",
+ "remove-objects",
+ NULL};
char *w = NULL;
uint32_t time = 0;
double percent = 0;
@@ -1496,6 +1623,11 @@ cli_add_key_group(dict_t *dict, char *key, char *value, char **op_errstr)
}
goto out;
}
+
+ /* Treat line that start with "#" as comments */
+ if ('#' == line[0])
+ continue;
+
opt_count++;
tok_key = strtok_r(line, "=", &saveptr);
tok_val = strtok_r(NULL, "\r\n", &saveptr);
@@ -1721,7 +1853,7 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
int ret = -1;
int brick_count = 0, brick_index = 0;
char *bricks = NULL;
- char *opwords_cl[] = {"replica", "stripe", NULL};
+ static char *opwords_cl[] = {"replica", "stripe", NULL};
gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
int count = 1;
int arbiter_count = 0;
@@ -1801,7 +1933,7 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
question =
"Replica 2 volumes are prone to "
"split-brain. Use Arbiter or "
- "Replica 3 to avaoid this. See: "
+ "Replica 3 to avoid this. See: "
"http://docs.gluster.org/en/latest/Administrator%20Guide/"
"Split%20brain%20and%20ways%20to%20deal%20with%20it/."
"\nDo you still want to continue?\n";
@@ -1866,146 +1998,6 @@ out:
}
int32_t
-cli_cmd_volume_tier_parse(const char **words, int wordcount, dict_t **options)
-{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int32_t command = GF_DEFRAG_CMD_NONE;
- int32_t is_force = 0;
-
- GF_ASSERT(words);
- GF_ASSERT(options);
-
- dict = dict_new();
-
- if (!dict)
- goto out;
-
- if (!(wordcount == 4 || wordcount == 5)) {
- gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
- ret = -1;
- goto out;
- }
-
- volname = (char *)words[2];
-
- GF_ASSERT(volname);
-
- ret = cli_cmd_validate_volume(volname);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed to validate volume name");
- goto out;
- }
-
- ret = dict_set_str(dict, "volname", volname);
-
- if (ret)
- goto out;
-
- volname = (char *)words[2];
- if (wordcount == 4) {
- if (!strcmp(words[3], "status"))
- command = GF_DEFRAG_CMD_STATUS_TIER;
- else if (!strcmp(words[3], "start"))
- command = GF_DEFRAG_CMD_START_TIER;
- else if (!strcmp(words[3], "stop"))
- command = GF_DEFRAG_CMD_STOP_TIER;
- else {
- ret = -1;
- goto out;
- }
- } else if (wordcount == 5) {
- if ((!strcmp(words[3], "start")) && (!strcmp(words[4], "force"))) {
- command = GF_DEFRAG_CMD_START_TIER;
- is_force = 1;
- ret = dict_set_int32(dict, "force", is_force);
- if (ret)
- goto out;
- } else {
- ret = -1;
- goto out;
- }
- }
-
- ret = dict_set_int32(dict, "rebalance-command", command);
- if (ret)
- goto out;
-
- *options = dict;
-out:
-
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Unable to parse tier CLI");
- if (dict)
- dict_unref(dict);
- }
-
- return ret;
-}
-
-int32_t
-cli_cmd_volume_detach_tier_parse(const char **words, int wordcount,
- dict_t **options, int *question)
-{
- int ret = -1;
- char *word = NULL;
- dict_t *dict = NULL;
- int32_t command = GF_DEFRAG_CMD_NONE;
-
- dict = dict_new();
- if (!dict)
- goto out;
-
- ret = dict_set_str(dict, "volname", (char *)words[2]);
- if (ret)
- goto out;
-
- if (wordcount == 3 && !strcmp((char *)words[2], "help")) {
- return -1;
- }
-
- if (wordcount != 4) {
- ret = -1;
- goto out;
- }
-
- word = (char *)words[3];
-
- ret = -1;
-
- if (!strcmp(word, "start")) {
- command = GF_DEFRAG_CMD_DETACH_START;
- } else if (!strcmp(word, "commit")) {
- *question = 1;
- command = GF_DEFRAG_CMD_DETACH_COMMIT;
- } else if (!strcmp(word, "force")) {
- *question = 1;
- command = GF_DEFRAG_CMD_DETACH_COMMIT_FORCE;
- } else if (!strcmp(word, "stop"))
- command = GF_DEFRAG_CMD_DETACH_STOP;
- else if (!strcmp(word, "status"))
- command = GF_DEFRAG_CMD_DETACH_STATUS;
- else
- goto out;
-
- ret = dict_set_int32(dict, "command", command);
- if (ret)
- goto out;
-
- *options = dict;
- ret = 0;
-out:
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Unable to parse detach tier CLI");
- if (dict)
- dict_unref(dict);
- }
-
- return ret;
-}
-
-int32_t
cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
int wordcount, dict_t **options,
int *question, int *brick_count,
@@ -2021,8 +2013,9 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
int32_t j = 0;
char *tmp_brick = NULL;
char *tmp_brick1 = NULL;
- char *type_opword[] = {"replica", NULL};
- char *opwords[] = {"start", "commit", "stop", "status", "force", NULL};
+ static char *type_opword[] = {"replica", NULL};
+ static char *opwords[] = {"start", "commit", "stop",
+ "status", "force", NULL};
char *w = NULL;
int32_t command = GF_OP_CMD_NONE;
long count = 0;
@@ -2068,7 +2061,7 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
ques =
"Replica 2 volumes are prone to "
"split-brain. Use Arbiter or Replica 3 "
- "to avaoid this. See: "
+ "to avoid this. See: "
"http://docs.gluster.org/en/latest/Administrator%20Guide/"
"Split%20brain%20and%20ways%20to%20deal%20with%20it/."
"\nDo you still want to continue?\n";
@@ -2593,8 +2586,6 @@ cli_cmd_log_rotate_parse(const char **words, int wordcount, dict_t **options)
if (strcmp("rotate", words[3]) == 0)
volname = (char *)words[2];
- else if (strcmp("rotate", words[2]) == 0)
- volname = (char *)words[3];
GF_ASSERT(volname);
ret = dict_set_str(dict, "volname", volname);
@@ -2637,6 +2628,17 @@ gsyncd_url_check(const char *w)
}
static gf_boolean_t
+valid_slave_gsyncd_url(const char *w)
+{
+ if (strstr(w, ":::"))
+ return _gf_false;
+ else if (strstr(w, "::"))
+ return _gf_true;
+ else
+ return _gf_false;
+}
+
+static gf_boolean_t
gsyncd_glob_check(const char *w)
{
return !!strpbrk(w, "*?[");
@@ -2859,7 +2861,8 @@ out:
}
int32_t
-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
+cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **errstr)
{
int32_t ret = -1;
dict_t *dict = NULL;
@@ -2869,13 +2872,16 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
unsigned slavei = 0;
unsigned glob = 0;
unsigned cmdi = 0;
- char *opwords[] = {"create", "status", "start", "stop", "config",
- "force", "delete", "ssh-port", "no-verify", "push-pem",
- "detail", "pause", "resume", NULL};
+ static char *opwords[] = {"create", "status", "start", "stop",
+ "config", "force", "delete", "ssh-port",
+ "no-verify", "push-pem", "detail", "pause",
+ "resume", NULL};
char *w = NULL;
char *save_ptr = NULL;
char *slave_temp = NULL;
char *token = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
GF_ASSERT(words);
GF_ASSERT(options);
@@ -2948,8 +2954,11 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
if (masteri && gsyncd_url_check(words[masteri]))
goto out;
- if (slavei && !glob && !gsyncd_url_check(words[slavei]))
+
+ if (slavei && !glob && !valid_slave_gsyncd_url(words[slavei])) {
+ gf_asprintf(errstr, "Invalid slave url: %s", words[slavei]);
goto out;
+ }
w = str_getunamb(words[cmdi], opwords);
if (!w)
@@ -3059,16 +3068,36 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options)
}
if (!ret)
ret = dict_set_int32(dict, "type", type);
- if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG)
+ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
+ if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") &&
+ !strcmp((char *)words[wordcount - 1], "true")) {
+ question =
+ "There exists ~15 seconds delay for the option to take"
+ " effect from stime of the corresponding brick. Please"
+ " check the log for the time, the option is effective."
+ " Proceed";
+
+ answer = cli_cmd_get_confirmation(state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_INFO,
+ "Operation "
+ "cancelled, exiting");
+ *errstr = gf_strdup("Aborted by user.");
+ ret = -1;
+ goto out;
+ }
+ }
+
ret = config_parse(words, wordcount, dict, cmdi, glob);
+ }
out:
if (slave_temp)
GF_FREE(slave_temp);
- if (ret) {
- if (dict)
- dict_unref(dict);
- } else
+ if (ret && dict)
+ dict_unref(dict);
+ else
*options = dict;
return ret;
@@ -3085,7 +3114,7 @@ cli_cmd_volume_profile_parse(const char **words, int wordcount,
gf1_cli_info_op info_op = GF_CLI_INFO_NONE;
gf_boolean_t is_peek = _gf_false;
- char *opwords[] = {"start", "stop", "info", NULL};
+ static char *opwords[] = {"start", "stop", "info", NULL};
char *w = NULL;
GF_ASSERT(words);
@@ -3186,8 +3215,9 @@ cli_cmd_volume_top_parse(const char **words, int wordcount, dict_t **options)
int count = 0;
gf_boolean_t nfs = _gf_false;
char *delimiter = NULL;
- char *opwords[] = {"open", "read", "write", "opendir", "readdir",
- "read-perf", "write-perf", "clear", NULL};
+ static char *opwords[] = {"open", "read", "write",
+ "opendir", "readdir", "read-perf",
+ "write-perf", "clear", NULL};
char *w = NULL;
GF_ASSERT(words);
@@ -3366,9 +3396,9 @@ cli_cmd_get_statusop(const char *arg)
int i = 0;
uint32_t ret = GF_CLI_STATUS_NONE;
char *w = NULL;
- char *opwords[] = {"detail", "mem", "clients", "fd", "inode",
- "callpool", "tasks", "client-list", NULL};
- struct {
+ static char *opwords[] = {"detail", "mem", "clients", "fd", "inode",
+ "callpool", "tasks", "client-list", NULL};
+ static struct {
char *opname;
uint32_t opcode;
} optable[] = {{"detail", GF_CLI_STATUS_DETAIL},
@@ -3455,8 +3485,6 @@ cli_cmd_volume_status_parse(const char **words, int wordcount, dict_t **options)
cmd |= GF_CLI_STATUS_QUOTAD;
} else if (!strcmp(words[3], "snapd")) {
cmd |= GF_CLI_STATUS_SNAPD;
- } else if (!strcmp(words[3], "tierd")) {
- cmd |= GF_CLI_STATUS_TIERD;
} else if (!strcmp(words[3], "bitd")) {
cmd |= GF_CLI_STATUS_BITD;
} else if (!strcmp(words[3], "scrub")) {
@@ -3532,16 +3560,6 @@ cli_cmd_volume_status_parse(const char **words, int wordcount, dict_t **options)
goto out;
}
cmd |= GF_CLI_STATUS_SNAPD;
- } else if (!strcmp(words[3], "tierd")) {
- if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_CLIENTS ||
- cmd == GF_CLI_STATUS_DETAIL || cmd == GF_CLI_STATUS_INODE) {
- cli_err(
- "Detail/FD/Clients/Inode status not "
- "available for tier daemon");
- ret = -1;
- goto out;
- }
- cmd |= GF_CLI_STATUS_TIERD;
} else {
if (cmd == GF_CLI_STATUS_TASKS) {
cli_err(
@@ -3578,9 +3596,9 @@ out:
gf_boolean_t
cli_cmd_validate_dumpoption(const char *arg, char **option)
{
- char *opwords[] = {"all", "nfs", "mem", "iobuf", "callpool",
- "priv", "fd", "inode", "history", "inodectx",
- "fdctx", "quotad", NULL};
+ static char *opwords[] = {"all", "nfs", "mem", "iobuf", "callpool",
+ "priv", "fd", "inode", "history", "inodectx",
+ "fdctx", "quotad", NULL};
char *w = NULL;
w = str_getunamb(arg, opwords);
@@ -3615,7 +3633,7 @@ cli_cmd_volume_statedump_options_parse(const char **words, int wordcount,
}
ip_addr = strtok(tmp, ":");
pid = strtok(NULL, ":");
- if (valid_internet_address(ip_addr, _gf_true) && pid &&
+ if (valid_internet_address(ip_addr, _gf_true, _gf_false) && pid &&
gf_valid_pid(pid, strlen(pid))) {
ret = gf_asprintf(&option_str, "%s %s %s", words[3], ip_addr, pid);
if (ret < 0) {
@@ -3809,7 +3827,7 @@ extract_hostname_path_from_token(const char *tmp_words, char **hostname,
ret = -1;
goto out;
}
- if (!valid_internet_address(host_name, _gf_false)) {
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
cli_err(
"internet address '%s' does not conform to "
"standards",
@@ -3888,8 +3906,6 @@ heal_command_type_get(const char *command)
[GF_SHD_OP_HEAL_INDEX] = NULL,
[GF_SHD_OP_HEAL_FULL] = "full",
[GF_SHD_OP_INDEX_SUMMARY] = "info",
- [GF_SHD_OP_HEALED_FILES] = NULL,
- [GF_SHD_OP_HEAL_FAILED_FILES] = NULL,
[GF_SHD_OP_SPLIT_BRAIN_FILES] = NULL,
[GF_SHD_OP_STATISTICS] = "statistics",
[GF_SHD_OP_STATISTICS_HEAL_COUNT] = NULL,
@@ -4067,55 +4083,6 @@ out:
}
int
-cli_cmd_volume_old_tier_parse(const char **words, int wordcount,
- dict_t **options)
-{
- dict_t *dict = NULL;
- int ret = -1;
- char *volname = NULL;
- gf_cli_defrag_type cmd = 0;
-
- GF_ASSERT(words);
- GF_ASSERT(options);
-
- dict = dict_new();
- if (!dict)
- goto out;
-
- if (wordcount != 4)
- goto out;
-
- if ((strcmp(words[1], "tier") == 0) && (strcmp(words[3], "start") == 0)) {
- cmd = GF_DEFRAG_CMD_START_TIER;
- } else
- goto out;
-
- volname = (char *)words[2];
-
- ret = dict_set_str(dict, "volname", volname);
-
- if (ret) {
- gf_log(THIS->name, GF_LOG_ERROR, "failed to set dict");
- goto out;
- }
-
- ret = dict_set_int32(dict, "rebalance-command", (int32_t)cmd);
-
- if (ret) {
- gf_log(THIS->name, GF_LOG_ERROR, "failed to set dict");
- goto out;
- }
-
- *options = dict;
-
-out:
- if (ret && dict)
- dict_unref(dict);
-
- return ret;
-}
-
-int
cli_cmd_volume_defrag_parse(const char **words, int wordcount, dict_t **options)
{
dict_t *dict = NULL;
@@ -5280,24 +5247,25 @@ cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options,
dict_t *dict = NULL;
gf1_cli_snapshot type = GF_SNAP_OPTION_TYPE_NONE;
char *w = NULL;
- char *opwords[] = {"create", "delete", "restore", "activate",
- "deactivate", "list", "status", "config",
- "info", "clone", NULL};
- char *invalid_snapnames[] = {"description", "force", "volume", "all", NULL};
- char *invalid_volnames[] = {"volume",
- "type",
- "subvolumes",
- "option",
- "end-volume",
- "all",
- "volume_not_in_ring",
- "description",
- "force",
- "snap-max-hard-limit",
- "snap-max-soft-limit",
- "auto-delete",
- "activate-on-create",
- NULL};
+ static char *opwords[] = {"create", "delete", "restore", "activate",
+ "deactivate", "list", "status", "config",
+ "info", "clone", NULL};
+ static char *invalid_snapnames[] = {"description", "force", "volume", "all",
+ NULL};
+ static char *invalid_volnames[] = {"volume",
+ "type",
+ "subvolumes",
+ "option",
+ "end-volume",
+ "all",
+ "volume_not_in_ring",
+ "description",
+ "force",
+ "snap-max-hard-limit",
+ "snap-max-soft-limit",
+ "auto-delete",
+ "activate-on-create",
+ NULL};
GF_ASSERT(words);
GF_ASSERT(options);
@@ -5617,16 +5585,18 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
int32_t ret = -1;
char *w = NULL;
char *volname = NULL;
- char *opwords[] = {
- "enable", "disable", "scrub-throttle", "scrub-frequency", "scrub",
- "signing-time", NULL};
- char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL};
- char *scrub_freq_values[] = {"hourly", "daily", "weekly", "biweekly",
- "monthly", "minute", NULL};
- char *scrub_values[] = {"pause", "resume", "status", "ondemand", NULL};
+ static char *opwords[] = {"enable", "disable", "scrub-throttle",
+ "scrub-frequency", "scrub", "signing-time",
+ "signer-threads", NULL};
+ static char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL};
+ static char *scrub_freq_values[] = {
+ "hourly", "daily", "weekly", "biweekly", "monthly", "minute", NULL};
+ static char *scrub_values[] = {"pause", "resume", "status", "ondemand",
+ NULL};
dict_t *dict = NULL;
gf_bitrot_type type = GF_BITROT_OPTION_TYPE_NONE;
int32_t expiry_time = 0;
+ int32_t signer_th_count = 0;
GF_ASSERT(words);
GF_ASSERT(options);
@@ -5807,6 +5777,31 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
}
goto set_type;
}
+ } else if (!strcmp(words[3], "signer-threads")) {
+ if (!words[4]) {
+ cli_err(
+ "Missing signer-thread value for bitrot "
+ "option");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SIGNER_THREADS;
+
+ signer_th_count = strtol(words[4], NULL, 0);
+ if (signer_th_count < 1) {
+ cli_err("signer-thread count should not be less than 1");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32(dict, "signer-threads",
+ (unsigned int)signer_th_count);
+ if (ret) {
+ cli_out("Failed to set dict for bitrot");
+ goto out;
+ }
+ goto set_type;
+ }
} else {
cli_err(
"Invalid option %s for bitrot. Please enter valid "
@@ -5815,7 +5810,6 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
ret = -1;
goto out;
}
-
set_type:
ret = dict_set_int32(dict, "type", type);
if (ret < 0)
@@ -5832,3 +5826,121 @@ out:
return ret;
}
+
+/* Parsing global option for NFS-Ganesha config
+ * gluster nfs-ganesha enable/disable */
+
+int32_t
+cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *key = NULL;
+ char *value = NULL;
+ char *w = NULL;
+ static char *opwords[] = {"enable", "disable", NULL};
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount != 2)
+ goto out;
+
+ key = (char *)words[0];
+ value = (char *)words[1];
+
+ if (!key || !value) {
+ cli_out("Usage : nfs-ganesha <enable/disable>");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_strip_whitespace(value, strlen(value));
+ if (ret == -1)
+ goto out;
+
+ if (strcmp(key, "nfs-ganesha")) {
+ gf_asprintf(op_errstr,
+ "Global option: error: ' %s '"
+ "is not a valid global option.",
+ key);
+ ret = -1;
+ goto out;
+ }
+
+ w = str_getunamb(value, opwords);
+ if (!w) {
+ cli_out(
+ "Invalid global option \n"
+ "Usage : nfs-ganesha <enable/disable>");
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(value, "enable") == 0) {
+ question =
+ "Enabling NFS-Ganesha requires Gluster-NFS to be "
+ "disabled across the trusted pool. Do you "
+ "still want to continue?\n";
+ } else if (strcmp(value, "disable") == 0) {
+ question =
+ "Disabling NFS-Ganesha will tear down the entire "
+ "ganesha cluster across the trusted pool. Do you "
+ "still want to continue?\n";
+ } else {
+ ret = -1;
+ goto out;
+ }
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Global operation "
+ "cancelled, exiting");
+ ret = -1;
+ goto out;
+ }
+ cli_out("This will take a few minutes to complete. Please wait ..");
+
+ ret = dict_set_str(dict, "key", key);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on key failed");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "value", value);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on value failed");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "globalname", "All");
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "dict set on global"
+ " key failed.");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "hold_global_locks", _gf_true);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "dict set on global key "
+ "failed.");
+ goto out;
+ }
+
+ *options = dict;
+out:
+ if (ret)
+ dict_unref(dict);
+
+ return ret;
+}
diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c
index ccbd2edde20..084998701d8 100644
--- a/cli/src/cli-cmd-peer.c
+++ b/cli/src/cli-cmd-peer.c
@@ -18,11 +18,7 @@
#include "cli-mem-types.h"
#include "cli1-xdr.h"
#include "protocol-common.h"
-#include "events.h"
-
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
+#include <glusterfs/events.h>
int
cli_cmd_peer_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
@@ -48,10 +44,6 @@ cli_cmd_peer_probe_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROBE];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
dict = dict_new();
if (!dict)
goto out;
@@ -60,7 +52,7 @@ cli_cmd_peer_probe_cbk(struct cli_state *state, struct cli_cmd_word *word,
if (ret)
goto out;
- ret = valid_internet_address((char *)words[2], _gf_false);
+ ret = valid_internet_address((char *)words[2], _gf_false, _gf_false);
if (ret == 1) {
ret = 0;
} else {
@@ -77,6 +69,12 @@ cli_cmd_peer_probe_cbk(struct cli_state *state, struct cli_cmd_word *word,
}
*/
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, dict);
if (proc->fn) {
@@ -127,10 +125,6 @@ cli_cmd_peer_deprobe_cbk(struct cli_state *state, struct cli_cmd_word *word,
"want to proceed?";
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEPROBE];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
dict = dict_new();
ret = dict_set_str(dict, "hostname", (char *)words[2]);
@@ -162,6 +156,12 @@ cli_cmd_peer_deprobe_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, dict);
if (proc->fn) {
diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c
index 814ab82f6eb..859d6b2e40d 100644
--- a/cli/src/cli-cmd-snapshot.c
+++ b/cli/src/cli-cmd-snapshot.c
@@ -17,8 +17,6 @@
#include "cli-cmd.h"
#include "cli-mem-types.h"
-extern rpc_clnt_prog_t *cli_rpc_prog;
-
int
cli_cmd_snapshot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount);
@@ -36,12 +34,6 @@ cli_cmd_snapshot_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SNAP];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (frame == NULL) {
- ret = -1;
- goto out;
- }
-
/* Parses the command entered by the user */
ret = cli_cmd_snapshot_parse(words, wordcount, &options, state);
if (ret) {
@@ -55,6 +47,12 @@ cli_cmd_snapshot_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn)
diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c
index 46ff926a2b8..801e8f4efed 100644
--- a/cli/src/cli-cmd-system.c
+++ b/cli/src/cli-cmd-system.c
@@ -18,10 +18,6 @@
#include "cli-mem-types.h"
#include "protocol-common.h"
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
-
int
cli_cmd_system_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount);
@@ -43,36 +39,39 @@ cli_cmd_getspec_cbk(struct cli_state *state, struct cli_cmd_word *word,
call_frame_t *frame = NULL;
dict_t *dict = NULL;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
goto out;
+ }
dict = dict_new();
if (!dict)
goto out;
- if (wordcount != 3) {
- cli_usage_out(word->pattern);
- goto out;
- }
-
ret = dict_set_str(dict, "volid", (char *)words[2]);
if (ret)
goto out;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETSPEC];
if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
ret = proc->fn(frame, THIS, dict);
}
out:
if (!proc && ret) {
- if (dict)
- dict_unref(dict);
if (wordcount > 1)
cli_out("Fetching spec for volume %s failed", (char *)words[2]);
}
+ if (dict)
+ dict_unref(dict);
+
+ CLI_STACK_DESTROY(frame);
return ret;
}
@@ -85,36 +84,39 @@ cli_cmd_pmap_b2p_cbk(struct cli_state *state, struct cli_cmd_word *word,
call_frame_t *frame = NULL;
dict_t *dict = NULL;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
goto out;
+ }
dict = dict_new();
if (!dict)
goto out;
- if (wordcount != 4) {
- cli_usage_out(word->pattern);
- goto out;
- }
-
ret = dict_set_str(dict, "brick", (char *)words[3]);
if (ret)
goto out;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PMAP_PORTBYBRICK];
if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
ret = proc->fn(frame, THIS, dict);
}
out:
if (!proc && ret) {
- if (dict)
- dict_unref(dict);
if (wordcount > 1)
cli_out("Fetching spec for volume %s failed", (char *)words[3]);
}
+ if (dict)
+ dict_unref(dict);
+
+ CLI_STACK_DESTROY(frame);
return ret;
}
@@ -174,6 +176,7 @@ make_seq_dict(int argc, char **argv)
{
char index[] = "4294967296"; // 1<<32
int i = 0;
+ int len;
int ret = 0;
dict_t *dict = dict_new();
@@ -181,8 +184,8 @@ make_seq_dict(int argc, char **argv)
return NULL;
for (i = 0; i < argc; i++) {
- snprintf(index, sizeof(index), "%d", i);
- ret = dict_set_str(dict, index, argv[i]);
+ len = snprintf(index, sizeof(index), "%d", i);
+ ret = dict_set_strn(dict, index, len, argv[i]);
if (ret == -1)
break;
}
@@ -322,6 +325,9 @@ out:
cli_out("uuid get failed");
}
+ if (dict)
+ dict_unref(dict);
+
CLI_STACK_DESTROY(frame);
return ret;
}
@@ -383,12 +389,15 @@ out:
cli_out("uuid reset failed");
}
+ if (dict)
+ dict_unref(dict);
+
CLI_STACK_DESTROY(frame);
return ret;
}
-struct cli_cmd cli_system_cmds[] = {
+static struct cli_cmd cli_system_cmds[] = {
{"system:: getspec <VOLNAME>", cli_cmd_getspec_cbk,
"fetch the volume file for the volume <VOLNAME>"},
@@ -432,6 +441,7 @@ cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word,
char *tmp = NULL;
int ret = -1;
int i = -1;
+ int len;
int cmd_args_count = 0;
int in_cmd_args_count = 0;
rpc_clnt_procedure_t *proc = NULL;
@@ -439,20 +449,21 @@ cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word,
dict_t *dict = NULL;
cli_local_t *local = NULL;
- if (wordcount < 3) {
+ if ((wordcount < 3) || (words[2] == NULL)) {
cli_usage_out(word->pattern);
goto out;
}
- dict = dict_new();
- if (!dict)
- goto out;
-
command = strtok_r((char *)words[2], " ", &saveptr);
if (command == NULL) {
gf_log("cli", GF_LOG_ERROR, "Failed to parse command");
goto out;
}
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
do {
tmp = strtok_r(NULL, " ", &saveptr);
if (tmp) {
@@ -480,9 +491,9 @@ cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word,
for (i = 1; i <= cmd_args_count; i++) {
in_cmd_args_count++;
- snprintf(cmd_arg_name, sizeof(cmd_arg_name), "cmd_arg_%d",
- in_cmd_args_count);
- ret = dict_set_str(dict, cmd_arg_name, (char *)words[2 + i]);
+ len = snprintf(cmd_arg_name, sizeof(cmd_arg_name), "cmd_arg_%d",
+ in_cmd_args_count);
+ ret = dict_set_strn(dict, cmd_arg_name, len, (char *)words[2 + i]);
if (ret) {
gf_log("", GF_LOG_ERROR, "Unable to set %s in dict", cmd_arg_name);
goto out;
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 55917e8217c..f238851586e 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -23,17 +23,16 @@
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "cli1-xdr.h"
-#include "run.h"
-#include "syscall.h"
-#include "common-utils.h"
-#include "events.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/events.h>
-extern struct rpc_clnt *global_rpc;
-extern struct rpc_clnt *global_quotad_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
extern rpc_clnt_prog_t cli_quotad_clnt;
+static int
+gf_asprintf_append(char **string_ptr, const char *format, ...);
+
int
cli_cmd_volume_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount);
@@ -47,10 +46,6 @@ cli_cmd_quota_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount);
int
-cli_cmd_tier_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
-
-int
cli_cmd_volume_info_cbk(struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
{
@@ -66,10 +61,6 @@ cli_cmd_volume_info_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
if ((wordcount == 2) || (wordcount == 3 && !strcmp(words[2], "all"))) {
ctx.flags = GF_CLI_GET_NEXT_VOLUME;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_NEXT_VOLUME];
@@ -92,6 +83,10 @@ cli_cmd_volume_info_cbk(struct cli_state *state, struct cli_cmd_word *word,
if (!local)
goto out;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
local->get_vol.flags = ctx.flags;
if (ctx.volname)
local->get_vol.volname = gf_strdup(ctx.volname);
@@ -217,10 +212,6 @@ cli_cmd_volume_create_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
ret = cli_cmd_volume_create_parse(state, words, wordcount, &options,
&bricks);
@@ -246,6 +237,12 @@ cli_cmd_volume_create_cbk(struct cli_state *state, struct cli_cmd_word *word,
}
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -288,14 +285,6 @@ cli_cmd_volume_delete_cbk(struct cli_state *state, struct cli_cmd_word *word,
"Do you want to continue?";
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DELETE_VOLUME];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
- dict = dict_new();
- if (!dict)
- goto out;
-
if (wordcount != 3) {
cli_usage_out(word->pattern);
parse_error = 1;
@@ -304,6 +293,10 @@ cli_cmd_volume_delete_cbk(struct cli_state *state, struct cli_cmd_word *word,
volname = (char *)words[2];
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
ret = dict_set_str(dict, "volname", volname);
if (ret) {
gf_log(THIS->name, GF_LOG_WARNING, "dict set failed");
@@ -325,6 +318,12 @@ cli_cmd_volume_delete_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, dict);
if (proc->fn) {
@@ -360,30 +359,15 @@ cli_cmd_volume_start_cbk(struct cli_state *state, struct cli_cmd_word *word,
int flags = 0;
cli_local_t *local = NULL;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
if (wordcount < 3 || wordcount > 4) {
cli_usage_out(word->pattern);
parse_error = 1;
goto out;
}
- dict = dict_new();
- if (!dict) {
- goto out;
- }
-
if (!words[2])
goto out;
- ret = dict_set_str(dict, "volname", (char *)words[2]);
- if (ret) {
- gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
- goto out;
- }
-
if (wordcount == 4) {
if (!strcmp("force", words[3])) {
flags |= GF_CLI_FLAG_OP_FORCE;
@@ -394,6 +378,18 @@ cli_cmd_volume_start_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
}
+
+ dict = dict_new();
+ if (!dict) {
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
ret = dict_set_int32(dict, "flags", flags);
if (ret) {
gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
@@ -402,6 +398,12 @@ cli_cmd_volume_start_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_START_VOLUME];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, dict);
if (proc->fn) {
@@ -488,10 +490,6 @@ cli_cmd_volume_stop_cbk(struct cli_state *state, struct cli_cmd_word *word,
"Stopping volume will make its data inaccessible. "
"Do you want to continue?";
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
if (wordcount < 3 || wordcount > 4) {
cli_usage_out(word->pattern);
parse_error = 1;
@@ -542,6 +540,12 @@ cli_cmd_volume_stop_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STOP_VOLUME];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, dict);
if (proc->fn) {
@@ -556,6 +560,8 @@ out:
}
CLI_STACK_DESTROY(frame);
+ if (dict)
+ dict_unref(dict);
if (ret == 0 && GF_ANSWER_YES == answer) {
gf_event(EVENT_VOLUME_STOP, "name=%s;force=%d", (char *)words[2],
@@ -576,20 +582,16 @@ cli_cmd_volume_rename_cbk(struct cli_state *state, struct cli_cmd_word *word,
int sent = 0;
int parse_error = 0;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
- dict = dict_new();
- if (!dict)
- goto out;
-
if (wordcount != 4) {
cli_usage_out(word->pattern);
parse_error = 1;
goto out;
}
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
ret = dict_set_str(dict, "old-volname", (char *)words[2]);
if (ret)
@@ -603,6 +605,11 @@ cli_cmd_volume_rename_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RENAME_VOLUME];
if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
ret = proc->fn(frame, THIS, dict);
}
@@ -641,10 +648,6 @@ cli_cmd_volume_defrag_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
#endif
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
ret = cli_cmd_volume_defrag_parse(words, wordcount, &dict);
if (ret) {
@@ -654,6 +657,12 @@ cli_cmd_volume_defrag_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEFRAG_VOLUME];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, dict);
if (proc->fn) {
@@ -702,10 +711,6 @@ cli_cmd_volume_reset_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_VOLUME];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
ret = cli_cmd_volume_reset_parse(words, wordcount, &options);
if (ret) {
cli_usage_out(word->pattern);
@@ -713,6 +718,12 @@ cli_cmd_volume_reset_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -817,10 +828,6 @@ cli_cmd_volume_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SET_VOLUME];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
ret = cli_cmd_volume_set_parse(state, words, wordcount, &options,
&op_errstr);
if (ret) {
@@ -834,6 +841,12 @@ cli_cmd_volume_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -850,13 +863,21 @@ out:
#if (USE_EVENTS)
if (ret == 0 && strcmp(words[2], "help") != 0) {
ret1 = dict_get_int32(options, "count", &num_options);
- if (ret1)
+ if (ret1) {
num_options = 0;
- else
+ goto end;
+ } else {
num_options = num_options / 2;
+ }
+ char *free_list_key[num_options];
+ char *free_list_val[num_options];
+ for (i = 0; i < num_options; i++) {
+ free_list_key[i] = NULL;
+ free_list_val[i] = NULL;
+ }
/* Initialize opts_str */
- opts_str = gf_strdup("");
+ opts_str = "";
/* Prepare String in format options=KEY1,VALUE1,KEY2,VALUE2 */
for (i = 1; i <= num_options; i++) {
@@ -866,6 +887,7 @@ out:
tmp_opt = "";
gf_asprintf(&opts_str, "%s,%s", opts_str, tmp_opt);
+ free_list_key[i - 1] = opts_str;
sprintf(dict_key, "value%d", i);
ret1 = dict_get_str(options, dict_key, &tmp_opt);
@@ -873,16 +895,21 @@ out:
tmp_opt = "";
gf_asprintf(&opts_str, "%s,%s", opts_str, tmp_opt);
+ free_list_val[i - 1] = opts_str;
}
gf_event(EVENT_VOLUME_SET, "name=%s;options=%s", (char *)words[2],
opts_str);
/* Allocated by gf_strdup and gf_asprintf */
- GF_FREE(opts_str);
+ for (i = 0; i < num_options; i++) {
+ GF_FREE(free_list_key[i]);
+ GF_FREE(free_list_val[i]);
+ }
}
#endif
+end:
CLI_STACK_DESTROY(frame);
return ret;
@@ -1027,10 +1054,6 @@ cli_cmd_volume_add_brick_cbk(struct cli_state *state, struct cli_cmd_word *word,
"filesystem operations on the volume after the change. Do you "
"really want to continue with 'stripe' count option ? ";
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
ret = cli_cmd_volume_add_brick_parse(state, words, wordcount, &options, 0);
if (ret) {
cli_usage_out(word->pattern);
@@ -1075,6 +1098,12 @@ cli_cmd_volume_add_brick_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_BRICK];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -1100,335 +1129,6 @@ out:
}
int
-cli_tier_validate_replica_type(dict_t *dict, int type)
-{
- int brick_count = -1;
- int replica_count = 1;
- int ret = -1;
-
- ret = dict_get_int32(dict, "count", &brick_count);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed to get brick count");
- goto out;
- }
-
- ret = dict_get_int32(dict, "replica-count", &replica_count);
- if (ret) {
- gf_log("cli", GF_LOG_DEBUG,
- "Failed to get replica count. "
- "Defaulting to one");
- replica_count = 1;
- }
-
- /*
- * Change the calculation of sub_count once attach-tier support
- * disperse volume.
- * sub_count = disperse_count for disperse volume
- * */
-
- if (brick_count % replica_count) {
- if (type == GF_CLUSTER_TYPE_REPLICATE)
- cli_err(
- "number of bricks is not a multiple of "
- "replica count");
- else if (type == GF_CLUSTER_TYPE_DISPERSE)
- cli_err(
- "number of bricks is not a multiple of "
- "disperse count");
- else
- cli_err(
- "number of bricks given doesn't match "
- "required count");
-
- ret = -1;
- goto out;
- }
- ret = 0;
-out:
- return ret;
-}
-
-int
-do_cli_cmd_volume_attach_tier(struct cli_state *state,
- struct cli_cmd_word *word, const char **words,
- int wordcount)
-{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
- cli_local_t *local = NULL;
- int type = 0;
-
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
- ret = cli_cmd_volume_add_brick_parse(state, words, wordcount, &options,
- &type);
- if (ret) {
- cli_usage_out(word->pattern);
- parse_error = 1;
- goto out;
- }
-
- /*
- * Merge this check when attach-tier has it's own cli parse function.
- */
- ret = cli_tier_validate_replica_type(options, type);
- if (ret) {
- cli_usage_out(word->pattern);
- parse_error = 1;
- goto out;
- }
-
- if (state->mode & GLUSTER_MODE_WIGNORE) {
- ret = dict_set_int32(options, "force", _gf_true);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to set force "
- "option");
- goto out;
- }
- }
-
- ret = dict_set_int32(options, "attach-tier", 1);
- if (ret)
- goto out;
-
- ret = dict_set_int32(options, "hot-type", type);
- if (ret)
- goto out;
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_TIER_BRICK];
-
- CLI_LOCAL_INIT(local, words, frame, options);
-
- if (proc->fn) {
- ret = proc->fn(frame, THIS, options);
- }
-
-out:
- if (ret) {
- cli_cmd_sent_status_get(&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out("attach-tier failed");
- }
-
- CLI_STACK_DESTROY(frame);
-
- return ret;
-}
-
-int
-do_cli_cmd_volume_detach_tier(struct cli_state *state,
- struct cli_cmd_word *word, const char **words,
- int wordcount, gf_boolean_t *aborted)
-{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
- gf_answer_t answer = GF_ANSWER_NO;
- cli_local_t *local = NULL;
- int need_question = 0;
-
- const char *question =
- "Removing tier can result in data loss. "
- "Do you want to Continue?";
-
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
- ret = cli_cmd_volume_detach_tier_parse(words, wordcount, &options,
- &need_question);
- if (ret) {
- cli_usage_out(word->pattern);
- parse_error = 1;
- goto out;
- }
-
- ret = dict_set_int32(options, "force", 1);
- if (ret)
- goto out;
-
- ret = dict_set_int32(options, "count", 0);
- if (ret)
- goto out;
-
- *aborted = _gf_false;
-
- if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
- /* we need to ask question only in case of 'commit or force' */
- answer = cli_cmd_get_confirmation(state, question);
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- *aborted = _gf_true;
- goto out;
- }
- }
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_TIER_BRICK];
-
- CLI_LOCAL_INIT(local, words, frame, options);
-
- if (proc->fn) {
- ret = proc->fn(frame, THIS, options);
- }
-
-out:
- if (ret) {
- cli_cmd_sent_status_get(&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out("Volume detach tier failed");
- }
-
- CLI_STACK_DESTROY(frame);
-
- return ret;
-}
-
-int
-cli_cmd_volume_tier_cbk(struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
-{
- int ret = -1;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- cli_local_t *local = NULL;
- int i = 0;
- eventtypes_t event = EVENT_LAST;
- gf_boolean_t aborted = _gf_false;
- gf_answer_t answer = GF_ANSWER_NO;
-
- const char *detach_question =
- "gluster volume detach-tier <VOLNAME> "
- "<start|stop|status|commit|force> is "
- "deprecated. Use the new command \'"
- "gluster volume tier <VOLNAME> detach <start|"
- "stop|status|commit|force>\'\n"
- "Do you want to Continue?";
-
- const char *attach_question =
- "gluster volume attach-tier <VOLNAME> "
- "[<replica COUNT>] <NEW-BRICK>... is "
- "deprecated. Use the new command \'"
- "gluster volume tier <VOLNAME> attach [<replica"
- " COUNT>] <NEW-BRICK>... [force]\'\n"
- "Do you want to Continue?";
-
- if (wordcount < 4) {
- if (wordcount == 3 && !strcmp(words[2], "help")) {
- cli_cmd_tier_help_cbk(state, word, words, wordcount);
- ret = 0;
- } else {
- cli_usage_out(word->pattern);
- }
- goto out;
- }
-
- if (!strcmp(words[1], "detach-tier")) {
- /* we need to ask question when older command is used */
- answer = cli_cmd_get_confirmation(state, detach_question);
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
- ret = do_cli_cmd_volume_detach_tier(state, word, words, wordcount,
- &aborted);
- goto out;
- } else if (!strcmp(words[3], "detach")) {
- for (i = 3; i < wordcount; i++)
- words[i] = words[i + 1];
-
- ret = do_cli_cmd_volume_detach_tier(state, word, words, wordcount - 1,
- &aborted);
- if (!aborted) {
- if (!strcmp(words[wordcount - 2], "commit")) {
- event = EVENT_TIER_DETACH_COMMIT;
- } else if (!strcmp(words[wordcount - 2], "start")) {
- event = EVENT_TIER_DETACH_START;
- } else if (!strcmp(words[wordcount - 2], "stop")) {
- event = EVENT_TIER_DETACH_STOP;
- } else if (!strcmp(words[wordcount - 2], "force")) {
- event = EVENT_TIER_DETACH_FORCE;
- }
- }
- goto out;
-
- } else if (!strcmp(words[1], "attach-tier")) {
- /* we need to ask question when the older command is used */
- answer = cli_cmd_get_confirmation(state, attach_question);
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
- ret = do_cli_cmd_volume_attach_tier(state, word, words, wordcount);
- goto out;
- } else if (!strcmp(words[3], "attach")) {
- for (i = 3; i < wordcount; i++)
- words[i] = words[i + 1];
-
- ret = do_cli_cmd_volume_attach_tier(state, word, words, wordcount - 1);
- if (!strcmp(words[wordcount - 2], "force")) {
- event = EVENT_TIER_ATTACH_FORCE;
- } else {
- event = EVENT_TIER_ATTACH;
- }
- goto out;
- }
-
- ret = cli_cmd_volume_tier_parse(words, wordcount, &options);
- if (ret) {
- cli_usage_out(word->pattern);
- goto out;
- }
-
- if (!strcmp(words[wordcount - 1], "start")) {
- event = EVENT_TIER_START;
- } else {
- if (!strcmp(words[wordcount - 2], "start") &&
- !strcmp(words[wordcount - 1], "force")) {
- event = EVENT_TIER_START_FORCE;
- }
- }
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_TIER];
-
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame) {
- gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
- ret = -1;
- goto out;
- }
-
- CLI_LOCAL_INIT(local, words, frame, options);
-
- if (proc->fn) {
- ret = proc->fn(frame, THIS, options);
- }
-
-out:
- if (ret) {
- cli_out("Tier command failed");
- } else {
- if (event != EVENT_LAST) {
- gf_event(event, "vol=%s", words[2]);
- }
- }
- if (options)
- dict_unref(options);
-
- return ret;
-}
-
-int
cli_get_soft_limit(dict_t *options, const char **words, dict_t *xdata)
{
call_frame_t *frame = NULL;
@@ -1541,7 +1241,7 @@ cli_cmd_quota_handle_list_all(const char **words, dict_t *options)
cli_local_t *local = NULL;
call_frame_t *frame = NULL;
dict_t *xdata = NULL;
- char *gfid_str = NULL;
+ char gfid_str[UUID_CANONICAL_FORM_LEN + 1];
char *volname = NULL;
char *volname_dup = NULL;
unsigned char buf[16] = {0};
@@ -1605,12 +1305,6 @@ cli_cmd_quota_handle_list_all(const char **words, dict_t *options)
goto out;
}
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
- }
-
volname_dup = gf_strdup(volname);
if (!volname_dup) {
ret = -1;
@@ -1642,15 +1336,15 @@ cli_cmd_quota_handle_list_all(const char **words, dict_t *options)
if (ret)
goto out;
- CLI_LOCAL_INIT(local, words, frame, xdata);
- proc = &cli_quotad_clnt.proctable[GF_AGGREGATOR_GETLIMIT];
-
- gfid_str = GF_CALLOC(1, gf_common_mt_char, 64);
- if (!gfid_str) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
ret = -1;
goto out;
}
+ CLI_LOCAL_INIT(local, words, frame, xdata);
+ proc = &cli_quotad_clnt.proctable[GF_AGGREGATOR_GETLIMIT];
+
for (count = 0;; count++) {
ret = quota_conf_read_gfid(fd, buf, &gfid_type, version);
if (ret == 0) {
@@ -1749,12 +1443,13 @@ out:
"xml format");
}
}
+ if (xdata)
+ dict_unref(xdata);
if (fd != -1) {
sys_close(fd);
}
- GF_FREE(gfid_str);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
"Could not fetch and display quota"
@@ -1781,6 +1476,7 @@ cli_cmd_bitrot_cbk(struct cli_state *state, struct cli_cmd_word *word,
int event_type = -1;
char *tmp = NULL;
char *events_str = NULL;
+ char *volname = NULL;
#endif
ret = cli_cmd_bitrot_parse(words, wordcount, &options);
@@ -1826,10 +1522,9 @@ out:
if (ret1)
cmd_type = -1;
else {
- ret1 = dict_get_str(options, "volname", &tmp);
+ ret1 = dict_get_str(options, "volname", &volname);
if (ret1)
- tmp = "";
- gf_asprintf(&events_str, "name=%s", tmp);
+ volname = "";
}
switch (cmd_type) {
@@ -1847,21 +1542,21 @@ out:
ret1 = dict_get_str(options, "scrub-throttle-value", &tmp);
if (ret1)
tmp = "";
- gf_asprintf(&events_str, "%s;value=%s", events_str, tmp);
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
break;
case GF_BITROT_OPTION_TYPE_SCRUB_FREQ:
event_type = EVENT_BITROT_SCRUB_FREQ;
ret1 = dict_get_str(options, "scrub-frequency-value", &tmp);
if (ret1)
tmp = "";
- gf_asprintf(&events_str, "%s;value=%s", events_str, tmp);
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
break;
case GF_BITROT_OPTION_TYPE_SCRUB:
event_type = EVENT_BITROT_SCRUB_OPTION;
ret1 = dict_get_str(options, "scrub-value", &tmp);
if (ret1)
tmp = "";
- gf_asprintf(&events_str, "%s;value=%s", events_str, tmp);
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
break;
default:
break;
@@ -1970,6 +1665,8 @@ out:
"Quota command failed. Please check the cli "
"logs for more details");
}
+ if (options)
+ dict_unref(options);
/* Events for Quota */
if (ret == 0) {
@@ -2057,10 +1754,6 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
int32_t command = GF_OP_CMD_NONE;
char *question = NULL;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
ret = cli_cmd_volume_remove_brick_parse(state, words, wordcount, &options,
&need_question, &brick_count,
&command);
@@ -2077,14 +1770,15 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
" on the volume.\nDo you want to continue?";
} else if (command == GF_OP_CMD_START) {
question =
- "Running remove-brick with cluster.force-migration"
- " enabled can result in data corruption. It is safer"
- " to disable this option so that files that receive "
- "writes during migration are not migrated.\nFiles "
- "that are not migrated can then be manually copied "
- "after the remove-brick commit operation.\nDo you "
- "want to continue with your current "
- "cluster.force-migration settings?";
+ "It is recommended that remove-brick be run with"
+ " cluster.force-migration option disabled to prevent"
+ " possible data corruption. Doing so will ensure that"
+ " files that receive writes during migration will not"
+ " be migrated and will need to be manually copied"
+ " after the remove-brick commit operation. Please"
+ " check the value of the option and update accordingly."
+ " \nDo you want to continue with your current"
+ " cluster.force-migration settings?";
}
if (!brick_count) {
@@ -2125,6 +1819,12 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -2146,6 +1846,8 @@ out:
#endif
CLI_STACK_DESTROY(frame);
+ if (options)
+ dict_unref(options);
return ret;
}
@@ -2169,10 +1871,6 @@ cli_cmd_volume_reset_brick_cbk(struct cli_state *state,
#endif
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_BRICK];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
ret = cli_cmd_volume_reset_brick_parse(words, wordcount, &options);
if (ret) {
@@ -2191,6 +1889,12 @@ cli_cmd_volume_reset_brick_cbk(struct cli_state *state,
}
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -2237,10 +1941,6 @@ cli_cmd_volume_replace_brick_cbk(struct cli_state *state,
#endif
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REPLACE_BRICK];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
ret = cli_cmd_volume_replace_brick_parse(words, wordcount, &options);
if (ret) {
@@ -2249,6 +1949,12 @@ cli_cmd_volume_replace_brick_cbk(struct cli_state *state,
goto out;
}
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -2344,8 +2050,7 @@ cli_cmd_log_rotate_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
- if (!((strcmp("rotate", words[2]) == 0) ||
- (strcmp("rotate", words[3]) == 0))) {
+ if (!(strcmp("rotate", words[3]) == 0)) {
cli_usage_out(word->pattern);
parse_error = 1;
goto out;
@@ -2353,6 +2058,10 @@ cli_cmd_log_rotate_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_ROTATE];
+ ret = cli_cmd_log_rotate_parse(words, wordcount, &options);
+ if (ret)
+ goto out;
+
frame = create_frame(THIS, THIS->ctx->pool);
if (!frame) {
gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
@@ -2360,10 +2069,6 @@ cli_cmd_log_rotate_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
- ret = cli_cmd_log_rotate_parse(words, wordcount, &options);
- if (ret)
- goto out;
-
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -2453,6 +2158,7 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
cli_local_t *local = NULL;
+ char *errstr = NULL;
#if (USE_EVENTS)
int ret1 = -1;
int cmd_type = -1;
@@ -2464,16 +2170,21 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GSYNC_SET];
- frame = create_frame(THIS, THIS->ctx->pool);
- if (frame == NULL) {
- ret = -1;
+ ret = cli_cmd_gsync_set_parse(state, words, wordcount, &options, &errstr);
+ if (ret) {
+ if (errstr) {
+ cli_err("%s", errstr);
+ GF_FREE(errstr);
+ } else {
+ cli_usage_out(word->pattern);
+ }
+ parse_err = 1;
goto out;
}
- ret = cli_cmd_gsync_set_parse(words, wordcount, &options);
- if (ret) {
- cli_usage_out(word->pattern);
- parse_err = 1;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
goto out;
}
@@ -2529,13 +2240,15 @@ out:
if (ret1)
tmp = "";
- gf_asprintf(&events_str, "%soption=%s;", events_str, tmp);
+ gf_asprintf_append(&events_str, "%soption=%s;", events_str,
+ tmp);
ret1 = dict_get_str(options, "op_value", &tmp);
if (ret1)
tmp = "";
- gf_asprintf(&events_str, "%svalue=%s;", events_str, tmp);
+ gf_asprintf_append(&events_str, "%svalue=%s;", events_str,
+ tmp);
} else if (strcmp(tmp, "del") == 0) {
event_type = EVENT_GEOREP_CONFIG_RESET;
@@ -2543,7 +2256,8 @@ out:
if (ret1)
tmp = "";
- gf_asprintf(&events_str, "%soption=%s;", events_str, tmp);
+ gf_asprintf_append(&events_str, "%soption=%s;", events_str,
+ tmp);
}
break;
default:
@@ -2553,42 +2267,49 @@ out:
if (event_type > -1) {
/* Capture all optional arguments used */
ret1 = dict_get_int32(options, "force", &tmpi);
- if (ret1 == 0)
- gf_asprintf(&events_str, "%sforce=%d;", events_str, tmpi);
-
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sforce=%d;", events_str,
+ tmpi);
+ }
ret1 = dict_get_int32(options, "push_pem", &tmpi);
- if (ret1 == 0)
- gf_asprintf(&events_str, "%spush_pem=%d;", events_str, tmpi);
-
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%spush_pem=%d;", events_str,
+ tmpi);
+ }
ret1 = dict_get_int32(options, "no_verify", &tmpi);
- if (ret1 == 0)
- gf_asprintf(&events_str, "%sno_verify=%d;", events_str, tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sno_verify=%d;", events_str,
+ tmpi);
+ }
ret1 = dict_get_int32(options, "ssh_port", &tmpi);
- if (ret1 == 0)
- gf_asprintf(&events_str, "%sssh_port=%d;", events_str, tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sssh_port=%d;", events_str,
+ tmpi);
+ }
ret1 = dict_get_int32(options, "reset-sync-time", &tmpi);
- if (ret1 == 0)
- gf_asprintf(&events_str, "%sreset_sync_time=%d;", events_str,
- tmpi);
-
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sreset_sync_time=%d;",
+ events_str, tmpi);
+ }
/* Capture Master and Slave Info */
ret1 = dict_get_str(options, "master", &tmp);
if (ret1)
tmp = "";
- gf_asprintf(&events_str, "%smaster=%s;", events_str, tmp);
+ gf_asprintf_append(&events_str, "%smaster=%s;", events_str, tmp);
ret1 = dict_get_str(options, "slave", &tmp);
if (ret1)
tmp = "";
- gf_asprintf(&events_str, "%sslave=%s", events_str, tmp);
+ gf_asprintf_append(&events_str, "%sslave=%s", events_str, tmp);
gf_event(event_type, "%s", events_str);
}
/* Allocated by gf_strdup and gf_asprintf */
- GF_FREE(events_str);
+ if (events_str)
+ GF_FREE(events_str);
}
#endif
@@ -2838,7 +2559,7 @@ cli_launch_glfs_heal(int heal_op, dict_t *options)
runinit(&runner);
ret = dict_get_str(options, "volname", &volname);
- runner_add_args(&runner, SBIN_DIR "/glfsheal", volname, NULL);
+ runner_add_args(&runner, GLFSHEAL_PREFIX "/glfsheal", volname, NULL);
runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
switch (heal_op) {
@@ -2913,9 +2634,6 @@ cli_cmd_volume_heal_cbk(struct cli_state *state, struct cli_cmd_word *word,
int heal_op = 0;
this = THIS;
- frame = create_frame(this, this->ctx->pool);
- if (!frame)
- goto out;
if (wordcount < 3) {
cli_usage_out(word->pattern);
@@ -2942,6 +2660,12 @@ cli_cmd_volume_heal_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -2976,10 +2700,6 @@ cli_cmd_volume_statedump_cbk(struct cli_state *state, struct cli_cmd_word *word,
int parse_error = 0;
cli_local_t *local = NULL;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
if (wordcount < 3) {
cli_usage_out(word->pattern);
parse_error = 1;
@@ -3005,6 +2725,12 @@ cli_cmd_volume_statedump_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATEDUMP_VOLUME];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -3032,12 +2758,11 @@ cli_cmd_volume_list_cbk(struct cli_state *state, struct cli_cmd_word *word,
rpc_clnt_procedure_t *proc = NULL;
int sent = 0;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_VOLUME];
if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
ret = proc->fn(frame, THIS, NULL);
}
@@ -3066,10 +2791,6 @@ cli_cmd_volume_clearlocks_cbk(struct cli_state *state,
int parse_error = 0;
cli_local_t *local = NULL;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
if (wordcount < 7 || wordcount > 8) {
cli_usage_out(word->pattern);
parse_error = 1;
@@ -3096,6 +2817,12 @@ cli_cmd_volume_clearlocks_cbk(struct cli_state *state,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CLRLOCKS_VOLUME];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn) {
@@ -3126,10 +2853,6 @@ cli_cmd_volume_barrier_cbk(struct cli_state *state, struct cli_cmd_word *word,
int parse_error = 0;
cli_local_t *local = NULL;
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
if (wordcount != 4) {
cli_usage_out(word->pattern);
parse_error = 1;
@@ -3151,6 +2874,12 @@ cli_cmd_volume_barrier_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_BARRIER_VOLUME];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn)
@@ -3185,10 +2914,6 @@ cli_cmd_volume_getopt_cbk(struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
- frame = create_frame(THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
options = dict_new();
if (!options)
goto out;
@@ -3203,6 +2928,12 @@ cli_cmd_volume_getopt_cbk(struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOL_OPT];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
CLI_LOCAL_INIT(local, words, frame, options);
if (proc->fn)
@@ -3232,6 +2963,16 @@ struct cli_cmd bitrot_cmds[] = {
{"volume bitrot <VOLNAME> {enable|disable}", NULL, /*cli_cmd_bitrot_cbk,*/
"Enable/disable bitrot for volume <VOLNAME>"},
+ {"volume bitrot <VOLNAME> signing-time <time-in-secs>",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Waiting time for an object after last fd is closed to start signing "
+ "process"},
+
+ {"volume bitrot <VOLNAME> signer-threads <count>",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Number of signing process threads. Usually set to number of available "
+ "cores"},
+
{"volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive}",
NULL, /*cli_cmd_bitrot_cbk,*/
"Set the speed of the scrubber for volume <VOLNAME>"},
@@ -3247,6 +2988,8 @@ struct cli_cmd bitrot_cmds[] = {
"the scrubber. ondemand starts the scrubber immediately."},
{"volume bitrot <VOLNAME> {enable|disable}\n"
+ "volume bitrot <VOLNAME> signing-time <time-in-secs>\n"
+ "volume bitrot <VOLNAME> signer-threads <count>\n"
"volume bitrot <volname> scrub-throttle {lazy|normal|aggressive}\n"
"volume bitrot <volname> scrub-frequency {hourly|daily|weekly|biweekly"
"|monthly}\n"
@@ -3289,50 +3032,6 @@ struct cli_cmd quota_cmds[] = {
{NULL, NULL, NULL}};
-struct cli_cmd tier_cmds[] = {
-
- {"volume tier help", cli_cmd_tier_help_cbk,
- "display help for volume tier commands"},
-
- {"volume tier <VOLNAME> status", cli_cmd_volume_tier_cbk,
- "Display tier status for <VOLNAME>"},
-
- {"volume tier <VOLNAME> start [force]", cli_cmd_volume_tier_cbk,
- "Start the tier service for <VOLNAME>"},
-
- {"volume tier <VOLNAME> stop [force]", cli_cmd_volume_tier_cbk,
- "Stop the tier service for <VOLNAME>"},
-
- {"volume tier <VOLNAME> attach [<replica COUNT>] <NEW-BRICK>... [force]",
- cli_cmd_volume_tier_cbk, "Attach a hot tier to <VOLNAME>"},
-
- {"volume tier <VOLNAME> detach <start|stop|status|commit|[force]>",
- cli_cmd_volume_tier_cbk, "Detach the hot tier from <VOLNAME>"},
-
- {"volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...",
- cli_cmd_volume_tier_cbk,
- "NOTE: this is old syntax, will be deprecated in next release. "
- "Please use gluster volume tier <vol> attach "
- "[<replica COUNT>] <NEW-BRICK>..."},
-
- {"volume detach-tier <VOLNAME> "
- "<start|stop|status|commit|force>",
- cli_cmd_volume_tier_cbk,
- "NOTE: this is old syntax, will be deprecated in next release. "
- "Please use gluster volume tier <vol> detach "
- "{start|stop|commit} [force]"},
-
- {"volume tier <VOLNAME> status\n"
- "volume tier <VOLNAME> start [force]\n"
- "volume tier <VOLNAME> stop\n"
- "volume tier <VOLNAME> attach [<replica COUNT>] <NEW-BRICK>... [force]\n"
- "volume tier <VOLNAME> detach <start|stop|status|commit|[force]>\n",
- cli_cmd_volume_tier_cbk, NULL},
-
- {NULL, NULL, NULL}
-
-};
-
struct cli_cmd volume_cmds[] = {
{"volume help", cli_cmd_volume_help_cbk,
"display help for volume commands"},
@@ -3341,12 +3040,9 @@ struct cli_cmd volume_cmds[] = {
"list information of all volumes"},
{"volume create <NEW-VOLNAME> [stripe <COUNT>] "
- "[replica <COUNT> [arbiter <COUNT>]] "
+ "[[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] "
"[disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] "
- "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"
-#ifdef HAVE_BD_XLATOR
- "?<vg_name>"
-#endif
+ "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> <TA-BRICK>"
"... [force]",
cli_cmd_volume_create_cbk,
@@ -3388,13 +3084,14 @@ struct cli_cmd volume_cmds[] = {
{"volume set <VOLNAME> <KEY> <VALUE>", cli_cmd_volume_set_cbk,
"set options for volume <VOLNAME>"},
+ {"volume set <VOLNAME> group <GROUP>", cli_cmd_volume_set_cbk,
+ "This option can be used for setting multiple pre-defined volume options "
+ "where group_name is a file under /var/lib/glusterd/groups containing one "
+ "key value pair per line"},
+
{"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk,
"rotate the log file for corresponding volume/brick"},
- {"volume log rotate <VOLNAME> [BRICK]", cli_cmd_log_rotate_cbk,
- "rotate the log file for corresponding volume/brick"
- " NOTE: This is an old syntax, will be deprecated from next release."},
-
{"volume sync <HOSTNAME> [all|<VOLNAME>]", cli_cmd_sync_volume_cbk,
"sync the volume information from a peer"},
@@ -3402,8 +3099,8 @@ struct cli_cmd volume_cmds[] = {
"reset all the reconfigured options"},
#if (SYNCDAEMON_COMPILE)
- {"volume " GEOREP " [<VOLNAME>] [<SLAVE-URL>] {\\\n create [[ssh-port n] "
- "[[no-verify] | [push-pem]]] [force] \\\n"
+ {"volume " GEOREP " [<MASTER-VOLNAME>] [<SLAVE-IP>]::[<SLAVE-VOLNAME>] {"
+ "\\\n create [[ssh-port n] [[no-verify] \\\n | [push-pem]]] [force] \\\n"
" | start [force] \\\n | stop [force] \\\n | pause [force] \\\n | resume "
"[force] \\\n"
" | config [[[\\!]<option>] [<value>]] \\\n | status "
@@ -3417,12 +3114,12 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_profile_cbk, "volume profile operations"},
{"volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick "
- "<brick>] [list-cnt <value>] |\n"
- "volume top <VOLNAME> {read-perf|write-perf} [bs <size> count <count>] "
+ "<brick>] [list-cnt <value>] | "
+ "{read-perf|write-perf} [bs <size> count <count>] "
"[brick <brick>] [list-cnt <value>]",
cli_cmd_volume_top_cbk, "volume top operations"},
- {"volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad|tierd]]"
+ {"volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad]]"
" [detail|clients|mem|inode|fd|callpool|tasks|client-list]",
cli_cmd_volume_status_cbk,
"display status of all or specified volume(s)/brick"},
@@ -3511,32 +3208,6 @@ cli_cmd_bitrot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
}
int
-cli_cmd_tier_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount)
-{
- struct cli_cmd *cmd = NULL;
- struct cli_cmd *tier_cmd = NULL;
- int count = 0;
-
- cmd = GF_MALLOC(sizeof(tier_cmds), cli_mt_cli_cmd);
- memcpy(cmd, tier_cmds, sizeof(tier_cmds));
- count = (sizeof(tier_cmds) / sizeof(struct cli_cmd));
- cli_cmd_sort(cmd, count);
-
- cli_out("\ngluster tier commands");
- cli_out("======================\n");
-
- for (tier_cmd = cmd; tier_cmd->pattern; tier_cmd++) {
- if ((_gf_false == tier_cmd->disable) && tier_cmd->desc) {
- cli_out("%s - %s", tier_cmd->pattern, tier_cmd->desc);
- }
- }
- cli_out("\n");
- GF_FREE(cmd);
- return 0;
-}
-
-int
cli_cmd_volume_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
@@ -3585,15 +3256,23 @@ cli_cmd_volume_register(struct cli_state *state)
goto out;
}
-#if !defined(__NetBSD__)
- for (cmd = tier_cmds; cmd->pattern; cmd++) {
- ret = cli_cmd_register(&state->tree, cmd);
- if (ret)
- goto out;
- }
-
-#endif
-
out:
return ret;
}
+
+static int
+gf_asprintf_append(char **string_ptr, const char *format, ...)
+{
+ va_list arg;
+ int rv = 0;
+ char *tmp = *string_ptr;
+
+ va_start(arg, format);
+ rv = gf_vasprintf(string_ptr, format, arg);
+ va_end(arg);
+
+ if (tmp)
+ GF_FREE(tmp);
+
+ return rv;
+}
diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c
index 396cabebbbe..2d458b16a56 100644
--- a/cli/src/cli-cmd.c
+++ b/cli/src/cli-cmd.c
@@ -28,11 +28,7 @@ static pthread_cond_t conn = PTHREAD_COND_INITIALIZER;
static pthread_mutex_t conn_mutex = PTHREAD_MUTEX_INITIALIZER;
int cli_op_ret = 0;
-int connected = 0;
-
-int
-cli_cmd_log_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
+static gf_boolean_t connected = _gf_false;
static unsigned
cli_cmd_needs_connection(struct cli_cmd_word *word)
@@ -108,12 +104,14 @@ cli_cmd_process(struct cli_state *state, int argc, char **argv)
}
if (!word) {
- cli_out("unrecognized word: %s (position %d)", argv[i], i);
+ cli_out("unrecognized word: %s (position %d)\n", argv[i], i);
+ usage();
return -1;
}
if (!word->cbkfn) {
- cli_out("unrecognized command");
+ cli_out("unrecognized command\n");
+ usage();
return -1;
}
@@ -234,18 +232,6 @@ out:
}
int
-cli_cmd_cond_init()
-{
- pthread_mutex_init(&cond_mutex, NULL);
- pthread_cond_init(&cond, NULL);
-
- pthread_mutex_init(&conn_mutex, NULL);
- pthread_cond_init(&conn, NULL);
-
- return 0;
-}
-
-int
cli_cmd_lock()
{
pthread_mutex_lock(&cond_mutex);
@@ -342,19 +328,32 @@ cli_cmd_await_connected(unsigned conn_timo)
}
int32_t
-cli_cmd_broadcast_connected()
+cli_cmd_broadcast_connected(gf_boolean_t status)
{
pthread_mutex_lock(&conn_mutex);
{
- connected = 1;
+ connected = status;
pthread_cond_broadcast(&conn);
}
-
pthread_mutex_unlock(&conn_mutex);
return 0;
}
+gf_boolean_t
+cli_cmd_connected(void)
+{
+ gf_boolean_t status;
+
+ pthread_mutex_lock(&conn_mutex);
+ {
+ status = connected;
+ }
+ pthread_mutex_unlock(&conn_mutex);
+
+ return status;
+}
+
int
cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
@@ -364,7 +363,8 @@ cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
unsigned timeout = 0;
if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) ||
- (GLUSTER_CLI_HEAL_VOLUME == procnum))
+ (GLUSTER_CLI_HEAL_VOLUME == procnum) ||
+ (GLUSTER_CLI_GANESHA == procnum))
timeout = cli_ten_minutes_timeout;
else
timeout = cli_default_conn_timeout;
diff --git a/cli/src/cli-cmd.h b/cli/src/cli-cmd.h
index 70c864eae30..c1c068c7085 100644
--- a/cli/src/cli-cmd.h
+++ b/cli/src/cli-cmd.h
@@ -13,7 +13,7 @@
#include <netdb.h>
#include "cli.h"
-#include "list.h"
+#include <glusterfs/list.h>
#define GLUSTER_SHARED_STORAGE "gluster_shared_storage"
@@ -102,9 +102,6 @@ int
cli_cmd_broadcast_response(int32_t status);
int
-cli_cmd_cond_init();
-
-int
cli_cmd_lock();
int
diff --git a/cli/src/cli-mem-types.h b/cli/src/cli-mem-types.h
index a762b685f0a..b42b4dd86c2 100644
--- a/cli/src/cli-mem-types.h
+++ b/cli/src/cli-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __CLI_MEM_TYPES_H__
#define __CLI_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
#define CLI_MEM_TYPE_START (gf_common_mt_end + 1)
diff --git a/cli/src/cli-quotad-client.c b/cli/src/cli-quotad-client.c
index 52ab97ee815..772b8f75bd9 100644
--- a/cli/src/cli-quotad-client.c
+++ b/cli/src/cli-quotad-client.c
@@ -10,9 +10,6 @@
#include "cli-quotad-client.h"
-extern struct rpc_clnt global_quotad_rpc;
-extern struct rpc_clnt_program cli_quotad_clnt;
-
int
cli_quotad_submit_request(void *req, call_frame_t *frame, rpc_clnt_prog_t *prog,
int procnum, struct iobref *iobref, xlator_t *this,
@@ -60,7 +57,7 @@ cli_quotad_submit_request(void *req, call_frame_t *frame, rpc_clnt_prog_t *prog,
}
/* Send the msg */
- ret = rpc_clnt_submit(&global_quotad_rpc, prog, procnum, cbkfn, &iov, count,
+ ret = rpc_clnt_submit(global_quotad_rpc, prog, procnum, cbkfn, &iov, count,
NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
ret = 0;
diff --git a/cli/src/cli-quotad-client.h b/cli/src/cli-quotad-client.h
index 57a998c913c..71a44e5916b 100644
--- a/cli/src/cli-quotad-client.h
+++ b/cli/src/cli-quotad-client.h
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
#include "cli.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "cli-cmd.h"
#include "cli1-xdr.h"
#include "xdr-generic.h"
diff --git a/cli/src/cli-rl.c b/cli/src/cli-rl.c
index 38aa6f4b7ae..7a38a0b882a 100644
--- a/cli/src/cli-rl.c
+++ b/cli/src/cli-rl.c
@@ -17,7 +17,7 @@
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "gf-event.h"
+#include <glusterfs/gf-event.h>
#include <fnmatch.h>
@@ -102,7 +102,7 @@ cli_rl_process_line(char *line)
state->rl_processing = 0;
}
-int
+void
cli_rl_stdin(int fd, int idx, int gen, void *data, int poll_out, int poll_in,
int poll_err, char event_thread_died)
{
@@ -112,9 +112,9 @@ cli_rl_stdin(int fd, int idx, int gen, void *data, int poll_out, int poll_in,
rl_callback_read_char();
- event_handled(state->ctx->event_pool, fd, idx, gen);
+ gf_event_handled(state->ctx->event_pool, fd, idx, gen);
- return 0;
+ return;
}
char *
@@ -346,6 +346,9 @@ cli_rl_input(void *_data)
state = _data;
+ fprintf(stderr,
+ "Welcome to gluster prompt, type 'help' to see the available "
+ "commands.\n");
for (;;) {
line = readline(state->prompt);
if (!line)
@@ -376,8 +379,8 @@ cli_rl_enable(struct cli_state *state)
goto out;
}
- ret = event_register(state->ctx->event_pool, 0, cli_rl_stdin, state, 1, 0,
- 0);
+ ret = gf_event_register(state->ctx->event_pool, 0, cli_rl_stdin, state, 1,
+ 0, 0);
if (ret == -1)
goto out;
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 13ff21a6c17..9b6b0c7fa50 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -19,42 +19,37 @@
#define INDENT_MAIN_HEAD "%-25s %s "
+#define RETURNING "Returning %d"
+#define XML_ERROR "Error outputting to xml"
+#define XDR_DECODE_FAIL "Failed to decode xdr response"
+#define DICT_SERIALIZE_FAIL "Failed to serialize to data to dictionary"
+#define DICT_UNSERIALIZE_FAIL "Failed to unserialize the dictionary"
+
/* Do not show estimates if greater than this number */
#define REBAL_ESTIMATE_SEC_UPPER_LIMIT (60 * 24 * 3600)
#define REBAL_ESTIMATE_START_TIME 600
#include "cli.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "cli-cmd.h"
#include <sys/uio.h>
#include <stdlib.h>
#include <sys/mount.h>
-#include "cli1-xdr.h"
-#include "xdr-generic.h"
-#include "protocol-common.h"
+#include <glusterfs/compat.h>
#include "cli-mem-types.h"
-#include "compat.h"
-#include "upcall-utils.h"
-
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "glusterfs3.h"
#include "portmap-xdr.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
-#include "cli-quotad-client.h"
-#include "run.h"
-#include "quota-common-utils.h"
-#include "events.h"
+#include <glusterfs/run.h>
+#include <glusterfs/events.h>
enum gf_task_types { GF_TASK_TYPE_REBALANCE, GF_TASK_TYPE_REMOVE_BRICK };
-extern struct rpc_clnt *global_quotad_rpc;
-extern rpc_clnt_prog_t cli_quotad_clnt;
-extern rpc_clnt_prog_t *cli_rpc_prog;
-extern int cli_op_ret;
-extern int connected;
+rpc_clnt_prog_t cli_quotad_clnt;
-int32_t
+static int32_t
gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data);
char *cli_vol_status_str[] = {
@@ -74,27 +69,27 @@ char *cli_vol_task_status_str[] = {"not started",
"fix-layout failed",
"unknown"};
-int32_t
+static int32_t
gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data);
-int32_t
+static int32_t
gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data);
-int
+static int
cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
rpc_clnt_prog_t *prog, struct iobref *iobref);
-int
+static int
add_cli_cmd_timeout_to_dict(dict_t *dict);
-rpc_clnt_prog_t cli_handshake_prog = {
+static rpc_clnt_prog_t cli_handshake_prog = {
.progname = "cli handshake",
.prognum = GLUSTER_HNDSK_PROGRAM,
.progver = GLUSTER_HNDSK_VERSION,
};
-rpc_clnt_prog_t cli_pmap_prog = {
+static rpc_clnt_prog_t cli_pmap_prog = {
.progname = "cli portmap",
.prognum = GLUSTER_PMAP_PROGRAM,
.progver = GLUSTER_PMAP_VERSION,
@@ -133,7 +128,7 @@ gf_free_xdr_fsm_log_rsp(gf1_cli_fsm_log_rsp rsp)
}
}
-int
+static int
gf_cli_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -141,9 +136,7 @@ gf_cli_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
0,
};
int ret = -1;
- char msg[1024] = {
- 0,
- };
+ char msg[1024] = "success";
GF_ASSERT(myframe);
@@ -154,7 +147,7 @@ gf_cli_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
// rsp.op_ret = -1;
// rsp.op_errno = EINVAL;
goto out;
@@ -162,22 +155,23 @@ gf_cli_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
gf_log("cli", GF_LOG_INFO, "Received resp to probe");
- if (rsp.op_errstr && (strlen(rsp.op_errstr) > 0)) {
+ if (rsp.op_errstr && rsp.op_errstr[0] != '\0') {
snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
- if (rsp.op_ret)
+ if (rsp.op_ret) {
gf_log("cli", GF_LOG_ERROR, "%s", msg);
+ }
}
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str(NULL, (rsp.op_ret) ? NULL : msg, rsp.op_ret,
rsp.op_errno, (rsp.op_ret) ? msg : NULL);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
if (!rsp.op_ret)
- cli_out("peer probe: success. %s", msg);
+ cli_out("peer probe: %s", msg);
else
cli_err("peer probe: failed: %s", msg);
@@ -189,7 +183,7 @@ out:
return ret;
}
-int
+static int
gf_cli_deprobe_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -197,9 +191,7 @@ gf_cli_deprobe_cbk(struct rpc_req *req, struct iovec *iov, int count,
0,
};
int ret = -1;
- char msg[1024] = {
- 0,
- };
+ char msg[1024] = "success";
GF_ASSERT(myframe);
@@ -210,7 +202,7 @@ gf_cli_deprobe_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
// rsp.op_ret = -1;
// rsp.op_errno = EINVAL;
goto out;
@@ -219,19 +211,17 @@ gf_cli_deprobe_cbk(struct rpc_req *req, struct iovec *iov, int count,
gf_log("cli", GF_LOG_INFO, "Received resp to deprobe");
if (rsp.op_ret) {
- if (strlen(rsp.op_errstr) > 0) {
+ if (rsp.op_errstr[0] != '\0') {
snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
gf_log("cli", GF_LOG_ERROR, "%s", rsp.op_errstr);
}
- } else {
- snprintf(msg, sizeof(msg), "success");
}
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str(NULL, (rsp.op_ret) ? NULL : msg, rsp.op_ret,
rsp.op_errno, (rsp.op_ret) ? msg : NULL);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -248,11 +238,11 @@ out:
return ret;
}
-int
-gf_cli_output_peer_hostnames(dict_t *dict, int count, char *prefix)
+static int
+gf_cli_output_peer_hostnames(dict_t *dict, int count, const char *prefix)
{
int ret = -1;
- char key[256] = {
+ char key[512] = {
0,
};
int i = 0;
@@ -263,8 +253,8 @@ gf_cli_output_peer_hostnames(dict_t *dict, int count, char *prefix)
* as friend.hostname
*/
for (i = 1; i < count; i++) {
- snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
- ret = dict_get_str(dict, key, &hostname);
+ ret = snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
+ ret = dict_get_strn(dict, key, ret, &hostname);
if (ret)
break;
cli_out("%s", hostname);
@@ -274,7 +264,7 @@ gf_cli_output_peer_hostnames(dict_t *dict, int count, char *prefix)
return ret;
}
-int
+static int
gf_cli_output_peer_status(dict_t *dict, int count)
{
int ret = -1;
@@ -284,26 +274,27 @@ gf_cli_output_peer_status(dict_t *dict, int count)
char key[256] = {
0,
};
+ int keylen;
char *state = NULL;
int32_t connected = 0;
- char *connected_str = NULL;
+ const char *connected_str = NULL;
int hostname_count = 0;
cli_out("Number of Peers: %d", count);
i = 1;
while (i <= count) {
- snprintf(key, 256, "friend%d.uuid", i);
- ret = dict_get_str(dict, key, &uuid_buf);
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_strn(dict, key, keylen, &uuid_buf);
if (ret)
goto out;
- snprintf(key, 256, "friend%d.hostname", i);
- ret = dict_get_str(dict, key, &hostname_buf);
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
if (ret)
goto out;
- snprintf(key, 256, "friend%d.connected", i);
- ret = dict_get_int32(dict, key, &connected);
+ keylen = snprintf(key, sizeof(key), "friend%d.connected", i);
+ ret = dict_get_int32n(dict, key, keylen, &connected);
if (ret)
goto out;
if (connected)
@@ -311,16 +302,16 @@ gf_cli_output_peer_status(dict_t *dict, int count)
else
connected_str = "Disconnected";
- snprintf(key, 256, "friend%d.state", i);
- ret = dict_get_str(dict, key, &state);
+ keylen = snprintf(key, sizeof(key), "friend%d.state", i);
+ ret = dict_get_strn(dict, key, keylen, &state);
if (ret)
goto out;
cli_out("\nHostname: %s\nUuid: %s\nState: %s (%s)", hostname_buf,
uuid_buf, state, connected_str);
- snprintf(key, sizeof(key), "friend%d.hostname_count", i);
- ret = dict_get_int32(dict, key, &hostname_count);
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &hostname_count);
/* Print other addresses only if there are more than 1.
*/
if ((ret == 0) && (hostname_count > 1)) {
@@ -340,7 +331,7 @@ out:
return ret;
}
-int
+static int
gf_cli_output_pool_list(dict_t *dict, int count)
{
int ret = -1;
@@ -348,18 +339,19 @@ gf_cli_output_pool_list(dict_t *dict, int count)
char *hostname_buf = NULL;
int32_t hostname_len = 8; /*min len 8 chars*/
int32_t i = 1;
- char key[256] = {
+ char key[64] = {
0,
};
+ int keylen;
int32_t connected = 0;
- char *connected_str = NULL;
+ const char *connected_str = NULL;
if (count <= 0)
goto out;
while (i <= count) {
- snprintf(key, 256, "friend%d.hostname", i);
- ret = dict_get_str(dict, key, &hostname_buf);
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
if (ret)
goto out;
@@ -374,18 +366,18 @@ gf_cli_output_pool_list(dict_t *dict, int count)
i = 1;
while (i <= count) {
- snprintf(key, 256, "friend%d.uuid", i);
- ret = dict_get_str(dict, key, &uuid_buf);
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_strn(dict, key, keylen, &uuid_buf);
if (ret)
goto out;
- snprintf(key, 256, "friend%d.hostname", i);
- ret = dict_get_str(dict, key, &hostname_buf);
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
if (ret)
goto out;
- snprintf(key, 256, "friend%d.connected", i);
- ret = dict_get_int32(dict, key, &connected);
+ keylen = snprintf(key, sizeof(key), "friend%d.connected", i);
+ ret = dict_get_int32n(dict, key, keylen, &connected);
if (ret)
goto out;
if (connected)
@@ -406,7 +398,7 @@ out:
/* function pointer for gf_cli_output_{pool_list,peer_status} */
typedef int (*cli_friend_output_fn)(dict_t *, int);
-int
+static int
gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -418,11 +410,15 @@ gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
char msg[1024] = {
0,
};
- char *cmd = NULL;
+ const char *cmd = NULL;
cli_friend_output_fn friend_output_fn;
call_frame_t *frame = NULL;
unsigned long flags = 0;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
GF_ASSERT(myframe);
frame = myframe;
@@ -440,14 +436,9 @@ gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
/* 'free' the flags set by gf_cli_list_friends */
frame->local = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
-
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
// rsp.op_ret = -1;
// rsp.op_errno = EINVAL;
goto out;
@@ -462,7 +453,7 @@ gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
msg);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
cli_err("%s", msg);
@@ -481,7 +472,7 @@ gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
&dict);
if (ret) {
- gf_log("", GF_LOG_ERROR, "Unable to allocate memory");
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
@@ -489,11 +480,11 @@ gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
msg);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
- ret = dict_get_int32(dict, "count", &count);
+ ret = dict_get_int32_sizen(dict, "count", &count);
if (ret) {
goto out;
}
@@ -507,7 +498,7 @@ gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
NULL);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
} else {
ret = -1;
}
@@ -531,7 +522,7 @@ out:
return ret;
}
-int
+static int
gf_cli_get_state_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -551,7 +542,7 @@ gf_cli_get_state_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -574,11 +565,11 @@ gf_cli_get_state_cbk(struct rpc_req *req, struct iovec *iov, int count,
"Failed to get daemon state. Check glusterd"
" log file for more details");
} else {
- ret = dict_get_str(dict, "daemon", &daemon_name);
+ ret = dict_get_str_sizen(dict, "daemon", &daemon_name);
if (ret)
gf_log("cli", GF_LOG_ERROR, "Couldn't get daemon name");
- ret = dict_get_str(dict, "ofilepath", &ofilepath);
+ ret = dict_get_str_sizen(dict, "ofilepath", &ofilepath);
if (ret)
gf_log("cli", GF_LOG_ERROR, "Couldn't get filepath");
@@ -598,7 +589,7 @@ out:
return ret;
}
-void
+static void
cli_out_options(char *substr, char *optstr, char *valstr)
{
char *ptr1 = NULL;
@@ -649,25 +640,24 @@ static int
print_brick_details(dict_t *dict, int volcount, int start_index, int end_index,
int replica_count)
{
- char key[1024] = {
+ char key[64] = {
0,
};
+ int keylen;
int index = start_index;
int isArbiter = 0;
int ret = -1;
char *brick = NULL;
-#ifdef HAVE_BD_XLATOR
- char *caps = NULL;
-#endif
while (index <= end_index) {
- snprintf(key, 1024, "volume%d.brick%d", volcount, index);
- ret = dict_get_str(dict, key, &brick);
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d", volcount,
+ index);
+ ret = dict_get_strn(dict, key, keylen, &brick);
if (ret)
goto out;
- snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", volcount,
- index);
- if (dict_get(dict, key))
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter",
+ volcount, index);
+ if (dict_getn(dict, key, keylen))
isArbiter = 1;
else
isArbiter = 0;
@@ -676,25 +666,20 @@ print_brick_details(dict_t *dict, int volcount, int start_index, int end_index,
cli_out("Brick%d: %s (arbiter)", index, brick);
else
cli_out("Brick%d: %s", index, brick);
-#ifdef HAVE_BD_XLATOR
- snprintf(key, 1024, "volume%d.vg%d", volcount, index);
- ret = dict_get_str(dict, key, &caps);
- if (!ret)
- cli_out("Brick%d VG: %s", index, caps);
-#endif
index++;
}
ret = 0;
out:
return ret;
}
-void
+
+static void
gf_cli_print_number_of_bricks(int type, int brick_count, int dist_count,
int stripe_count, int replica_count,
int disperse_count, int redundancy_count,
int arbiter_count)
{
- if (type == GF_CLUSTER_TYPE_NONE || type == GF_CLUSTER_TYPE_TIER) {
+ if (type == GF_CLUSTER_TYPE_NONE) {
cli_out("Number of Bricks: %d", brick_count);
} else if (type == GF_CLUSTER_TYPE_DISPERSE) {
cli_out("Number of Bricks: %d x (%d + %d) = %d",
@@ -714,108 +699,7 @@ gf_cli_print_number_of_bricks(int type, int brick_count, int dist_count,
}
}
-int
-gf_cli_print_tier_info(dict_t *dict, int i, int brick_count)
-{
- int hot_brick_count = -1;
- int cold_type = 0;
- int cold_brick_count = 0;
- int cold_replica_count = 0;
- int cold_arbiter_count = 0;
- int cold_disperse_count = 0;
- int cold_redundancy_count = 0;
- int cold_dist_count = 0;
- int hot_type = 0;
- int hot_replica_count = 0;
- int hot_dist_count = 0;
- int ret = -1;
- int vol_type = -1;
- char key[256] = {
- 0,
- };
-
- GF_ASSERT(dict);
-
- snprintf(key, sizeof(key), "volume%d.cold_brick_count", i);
- ret = dict_get_int32(dict, key, &cold_brick_count);
- if (ret)
- goto out;
-
- snprintf(key, sizeof(key), "volume%d.cold_type", i);
- ret = dict_get_int32(dict, key, &cold_type);
- if (ret)
- goto out;
-
- snprintf(key, sizeof(key), "volume%d.cold_dist_count", i);
- ret = dict_get_int32(dict, key, &cold_dist_count);
- if (ret)
- goto out;
-
- snprintf(key, sizeof(key), "volume%d.cold_replica_count", i);
- ret = dict_get_int32(dict, key, &cold_replica_count);
- if (ret)
- goto out;
-
- snprintf(key, sizeof(key), "volume%d.cold_arbiter_count", i);
- ret = dict_get_int32(dict, key, &cold_arbiter_count);
- if (ret)
- goto out;
-
- snprintf(key, sizeof(key), "volume%d.cold_disperse_count", i);
- ret = dict_get_int32(dict, key, &cold_disperse_count);
- if (ret)
- goto out;
-
- snprintf(key, sizeof(key), "volume%d.cold_redundancy_count", i);
- ret = dict_get_int32(dict, key, &cold_redundancy_count);
- if (ret)
- goto out;
-
- snprintf(key, sizeof(key), "volume%d.hot_brick_count", i);
- ret = dict_get_int32(dict, key, &hot_brick_count);
- if (ret)
- goto out;
-
- snprintf(key, sizeof(key), "volume%d.hot_type", i);
- ret = dict_get_int32(dict, key, &hot_type);
- if (ret)
- goto out;
- snprintf(key, sizeof(key), "volume%d.hot_replica_count", i);
- ret = dict_get_int32(dict, key, &hot_replica_count);
- if (ret)
- goto out;
-
- cli_out("Hot Tier :");
- hot_dist_count = (hot_replica_count ? hot_replica_count : 1);
-
- vol_type = get_vol_type(hot_type, hot_dist_count, hot_brick_count);
- cli_out("Hot Tier Type : %s", vol_type_str[vol_type]);
-
- gf_cli_print_number_of_bricks(hot_type, hot_brick_count, hot_dist_count, 0,
- hot_replica_count, 0, 0, 0);
-
- ret = print_brick_details(dict, i, 1, hot_brick_count, hot_replica_count);
- if (ret)
- goto out;
-
- cli_out("Cold Tier:");
-
- vol_type = get_vol_type(cold_type, cold_dist_count, cold_brick_count);
- cli_out("Cold Tier Type : %s", vol_type_str[vol_type]);
-
- gf_cli_print_number_of_bricks(cold_type, cold_brick_count, cold_dist_count,
- 0, cold_replica_count, cold_disperse_count,
- cold_redundancy_count, cold_arbiter_count);
-
- ret = print_brick_details(dict, i, hot_brick_count + 1, brick_count,
- cold_replica_count);
- if (ret)
- goto out;
-out:
- return ret;
-}
-
-int
+static int
gf_cli_get_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -833,13 +717,16 @@ gf_cli_get_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
int32_t redundancy_count = 0;
int32_t arbiter_count = 0;
int32_t snap_count = 0;
+ int32_t thin_arbiter_count = 0;
int32_t vol_type = 0;
int32_t transport = 0;
char *volume_id_str = NULL;
char *volname = NULL;
+ char *ta_brick = NULL;
dict_t *dict = NULL;
cli_local_t *local = NULL;
- char key[1024] = {0};
+ char key[64] = {0};
+ int keylen;
char err_str[2048] = {0};
gf_cli_rsp rsp = {0};
char *caps __attribute__((unused)) = NULL;
@@ -859,8 +746,7 @@ gf_cli_get_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -884,11 +770,11 @@ gf_cli_get_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Unable to allocate memory");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
- ret = dict_get_int32(dict, "count", &count);
+ ret = dict_get_int32_sizen(dict, "count", &count);
if (ret)
goto out;
@@ -925,7 +811,7 @@ xml_output:
ret = cli_xml_output_vol_info_begin(local, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
}
@@ -933,7 +819,7 @@ xml_output:
if (dict) {
ret = cli_xml_output_vol_info(local, dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
}
@@ -941,75 +827,80 @@ xml_output:
if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
ret = cli_xml_output_vol_info_end(local);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
goto out;
}
while (i < count) {
cli_out(" ");
- snprintf(key, 256, "volume%d.name", i);
- ret = dict_get_str(dict, key, &volname);
+ keylen = snprintf(key, sizeof(key), "volume%d.name", i);
+ ret = dict_get_strn(dict, key, keylen, &volname);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.type", i);
- ret = dict_get_int32(dict, key, &type);
+ keylen = snprintf(key, sizeof(key), "volume%d.type", i);
+ ret = dict_get_int32n(dict, key, keylen, &type);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.status", i);
- ret = dict_get_int32(dict, key, &status);
+ keylen = snprintf(key, sizeof(key), "volume%d.status", i);
+ ret = dict_get_int32n(dict, key, keylen, &status);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "volume%d.brick_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &brick_count);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.brick_count", i);
- ret = dict_get_int32(dict, key, &brick_count);
+ keylen = snprintf(key, sizeof(key), "volume%d.dist_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &dist_count);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.dist_count", i);
- ret = dict_get_int32(dict, key, &dist_count);
+ keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &stripe_count);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.stripe_count", i);
- ret = dict_get_int32(dict, key, &stripe_count);
+ keylen = snprintf(key, sizeof(key), "volume%d.replica_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &replica_count);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.replica_count", i);
- ret = dict_get_int32(dict, key, &replica_count);
+ keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &disperse_count);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.disperse_count", i);
- ret = dict_get_int32(dict, key, &disperse_count);
+ keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &redundancy_count);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.redundancy_count", i);
- ret = dict_get_int32(dict, key, &redundancy_count);
+ keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &arbiter_count);
if (ret)
goto out;
- snprintf(key, sizeof(key), "volume%d.arbiter_count", i);
- ret = dict_get_int32(dict, key, &arbiter_count);
+ keylen = snprintf(key, sizeof(key), "volume%d.transport", i);
+ ret = dict_get_int32n(dict, key, keylen, &transport);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.transport", i);
- ret = dict_get_int32(dict, key, &transport);
+ keylen = snprintf(key, sizeof(key), "volume%d.volume_id", i);
+ ret = dict_get_strn(dict, key, keylen, &volume_id_str);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.volume_id", i);
- ret = dict_get_str(dict, key, &volume_id_str);
+ keylen = snprintf(key, sizeof(key), "volume%d.snap_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &snap_count);
if (ret)
goto out;
- snprintf(key, 256, "volume%d.snap_count", i);
- ret = dict_get_int32(dict, key, &snap_count);
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &thin_arbiter_count);
if (ret)
goto out;
@@ -1022,32 +913,6 @@ xml_output:
cli_out("Status: %s", cli_vol_status_str[status]);
cli_out("Snapshot Count: %d", snap_count);
-#ifdef HAVE_BD_XLATOR
- k = 0;
- snprintf(key, sizeof(key), "volume%d.xlator%d", i, k);
- ret = dict_get_str(dict, key, &caps);
- if (ret)
- goto next;
- do {
- j = 0;
- cli_out("Xlator %d: %s", k + 1, caps);
- do {
- snprintf(key, sizeof(key), "volume%d.xlator%d.caps%d", i, k,
- j++);
- ret = dict_get_str(dict, key, &caps);
- if (ret)
- break;
- cli_out("Capability %d: %s", j, caps);
- } while (1);
-
- snprintf(key, sizeof(key), "volume%d.xlator%d", i, ++k);
- ret = dict_get_str(dict, key, &caps);
- if (ret)
- break;
- } while (1);
-
- next:
-#endif
gf_cli_print_number_of_bricks(
type, brick_count, dist_count, stripe_count, replica_count,
disperse_count, redundancy_count, arbiter_count);
@@ -1060,19 +925,20 @@ xml_output:
GF_FREE(local->get_vol.volname);
local->get_vol.volname = gf_strdup(volname);
- if (type == GF_CLUSTER_TYPE_TIER) {
- ret = gf_cli_print_tier_info(dict, i, brick_count);
- if (ret)
- goto out;
+ cli_out("Bricks:");
+ ret = print_brick_details(dict, i, j, brick_count, replica_count);
+ if (ret)
+ goto out;
- } else {
- cli_out("Bricks:");
- ret = print_brick_details(dict, i, j, brick_count, replica_count);
+ if (thin_arbiter_count) {
+ snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick", i);
+ ret = dict_get_str(dict, key, &ta_brick);
if (ret)
goto out;
+ cli_out("Thin-arbiter-path: %s", ta_brick);
}
- snprintf(key, 256, "volume%d.opt_count", i);
+ snprintf(key, sizeof(key), "volume%d.opt_count", i);
ret = dict_get_int32(dict, key, &opt_count);
if (ret)
goto out;
@@ -1082,7 +948,7 @@ xml_output:
cli_out("Options Reconfigured:");
- snprintf(key, 256, "volume%d.option.", i);
+ snprintf(key, sizeof(key), "volume%d.option.", i);
ret = dict_foreach(dict, _gf_cli_output_volinfo_opts, key);
if (ret)
@@ -1103,11 +969,11 @@ out:
gf_free_xdr_cli_rsp(rsp);
- gf_log("cli", GF_LOG_DEBUG, "Returning: %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int
+static int
gf_cli_create_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -1134,24 +1000,19 @@ gf_cli_create_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
gf_log("cli", GF_LOG_INFO, "Received resp to create volume");
- ret = dict_get_str(local->dict, "volname", &volname);
- if (ret)
- goto out;
-
if (global_state->mode & GLUSTER_MODE_XML) {
if (rsp.op_ret == 0) {
rsp_dict = dict_new();
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
&rsp_dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed rsp_dict unserialization");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
@@ -1159,10 +1020,14 @@ gf_cli_create_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_create(rsp_dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret)
+ goto out;
+
if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
cli_err("volume create: %s: failed: %s", volname, rsp.op_errstr);
else if (rsp.op_ret)
@@ -1178,10 +1043,13 @@ gf_cli_create_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
out:
cli_cmd_broadcast_response(ret);
gf_free_xdr_cli_rsp(rsp);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
return ret;
}
-int
+static int
gf_cli_delete_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -1194,12 +1062,11 @@ gf_cli_delete_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
call_frame_t *frame = NULL;
dict_t *rsp_dict = NULL;
- GF_ASSERT(myframe);
-
if (-1 == req->rpc_status) {
goto out;
}
+ GF_ASSERT(myframe);
frame = myframe;
GF_ASSERT(frame->local);
@@ -1208,14 +1075,7 @@ gf_cli_delete_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
- goto out;
- }
-
- ret = dict_get_str(local->dict, "volname", &volname);
- if (ret) {
- gf_log(frame->this->name, GF_LOG_ERROR, "dict get failed");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -1227,7 +1087,7 @@ gf_cli_delete_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
&rsp_dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed rsp_dict unserialization");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
@@ -1235,7 +1095,13 @@ gf_cli_delete_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_generic_volume("volDelete", rsp_dict, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "dict get failed");
goto out;
}
@@ -1251,11 +1117,14 @@ gf_cli_delete_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
out:
cli_cmd_broadcast_response(ret);
gf_free_xdr_cli_rsp(rsp);
- gf_log("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int
+static int
gf_cli3_1_uuid_get_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -1281,8 +1150,7 @@ gf_cli3_1_uuid_get_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -1298,17 +1166,7 @@ gf_cli3_1_uuid_get_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to unserialize "
- "response for uuid get");
- goto out;
- }
-
- ret = dict_get_str(dict, "uuid", &uuid_str);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to get uuid "
- "from dictionary");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
@@ -1316,7 +1174,7 @@ gf_cli3_1_uuid_get_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_dict("uuidGenerate", dict, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -1327,6 +1185,11 @@ gf_cli3_1_uuid_get_cbk(struct rpc_req *req, struct iovec *iov, int count,
cli_err("%s", rsp.op_errstr);
} else {
+ ret = dict_get_str_sizen(dict, "uuid", &uuid_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get uuid from dictionary");
+ goto out;
+ }
cli_out("UUID: %s", uuid_str);
}
ret = rsp.op_ret;
@@ -1336,11 +1199,14 @@ out:
cli_local_wipe(local);
gf_free_xdr_cli_rsp(rsp);
- gf_log("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (dict)
+ dict_unref(dict);
+
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int
+static int
gf_cli3_1_uuid_reset_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -1365,8 +1231,7 @@ gf_cli3_1_uuid_reset_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -1378,7 +1243,7 @@ gf_cli3_1_uuid_reset_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_dict("uuidReset", NULL, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -1394,11 +1259,11 @@ out:
cli_local_wipe(local);
gf_free_xdr_cli_rsp(rsp);
- gf_log("", GF_LOG_DEBUG, "Returning with %d", ret);
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int
+static int
gf_cli_start_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -1425,14 +1290,7 @@ gf_cli_start_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
- goto out;
- }
-
- ret = dict_get_str(local->dict, "volname", &volname);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "dict get failed");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -1444,7 +1302,7 @@ gf_cli_start_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
&rsp_dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed rsp_dict unserialization");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
@@ -1452,7 +1310,13 @@ gf_cli_start_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_generic_volume("volStart", rsp_dict, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "dict get failed");
goto out;
}
@@ -1468,10 +1332,13 @@ gf_cli_start_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
out:
cli_cmd_broadcast_response(ret);
gf_free_xdr_cli_rsp(rsp);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
return ret;
}
-int
+static int
gf_cli_stop_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -1498,15 +1365,7 @@ gf_cli_stop_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
- goto out;
- }
-
- ret = dict_get_str(local->dict, "volname", &volname);
- if (ret) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Unable to get volname from dict");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -1518,7 +1377,7 @@ gf_cli_stop_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
&rsp_dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed rsp_dict unserialization");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
@@ -1526,7 +1385,14 @@ gf_cli_stop_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_generic_volume("volStop", rsp_dict, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Unable to get volname from dict");
goto out;
}
@@ -1543,19 +1409,22 @@ out:
cli_cmd_broadcast_response(ret);
gf_free_xdr_cli_rsp(rsp);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
return ret;
}
-int
-gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type,
- gf_boolean_t is_tier)
+static int
+gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type)
{
int ret = -1;
int count = 0;
int i = 1;
- char key[256] = {
+ char key[64] = {
0,
};
+ int keylen;
gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
uint64_t files = 0;
uint64_t size = 0;
@@ -1569,26 +1438,32 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type,
int32_t hrs = 0;
uint32_t min = 0;
uint32_t sec = 0;
- gf_boolean_t down = _gf_false;
gf_boolean_t fix_layout = _gf_false;
uint64_t max_time = 0;
uint64_t max_elapsed = 0;
uint64_t time_left = 0;
gf_boolean_t show_estimates = _gf_false;
- ret = dict_get_int32(dict, "count", &count);
+ ret = dict_get_int32_sizen(dict, "count", &count);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "count not set");
goto out;
}
- snprintf(key, sizeof(key), "status-1");
-
- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
- if (ret) {
- gf_log("cli", GF_LOG_TRACE, "count %d %d", count, 1);
- gf_log("cli", GF_LOG_TRACE, "failed to get status");
- goto out;
+ for (i = 1; i <= count; i++) {
+ keylen = snprintf(key, sizeof(key), "status-%d", i);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&status_rcd);
+ /* If information from a node is missing we should skip
+ * the node and try to fetch information of other nodes.
+ * If information is not found for all nodes, we should
+ * error out.
+ */
+ if (!ret)
+ break;
+ if (ret && i == count) {
+ gf_log("cli", GF_LOG_TRACE, "failed to get status");
+ goto out;
+ }
}
/* Fix layout will be sent to all nodes for the volume
@@ -1627,16 +1502,13 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type,
time_left = 0;
/* Check if status is NOT_STARTED, and continue early */
- snprintf(key, sizeof(key), "status-%d", i);
+ keylen = snprintf(key, sizeof(key), "status-%d", i);
- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&status_rcd);
if (ret == -ENOENT) {
gf_log("cli", GF_LOG_TRACE, "count %d %d", count, i);
gf_log("cli", GF_LOG_TRACE, "failed to get status");
- gf_log("cli", GF_LOG_ERROR,
- "node down and has failed"
- " to set dict");
- down = _gf_true;
+ gf_log("cli", GF_LOG_ERROR, "node down and has failed to set dict");
continue;
/* skip this node if value not available*/
} else if (ret) {
@@ -1652,8 +1524,8 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type,
if (GF_DEFRAG_STATUS_STARTED == status_rcd)
show_estimates = _gf_true;
- snprintf(key, 256, "node-name-%d", i);
- ret = dict_get_str(dict, key, &node_name);
+ keylen = snprintf(key, sizeof(key), "node-name-%d", i);
+ ret = dict_get_strn(dict, key, keylen, &node_name);
if (ret)
gf_log("cli", GF_LOG_TRACE, "failed to get node-name");
@@ -1737,12 +1609,6 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type,
}
GF_FREE(size_str);
}
- if (is_tier && down)
- cli_out(
- "WARNING: glusterd might be down on one or more nodes."
- " Please check the nodes that are down using \'gluster"
- " peer status\' and start the glusterd on those nodes,"
- " else tier detach commit might fail!");
/* Max time will be non-zero if rebalance is still running */
if (max_time) {
@@ -1783,113 +1649,7 @@ out:
return ret;
}
-int
-gf_cli_print_tier_status(dict_t *dict, enum gf_task_types task_type)
-{
- int ret = -1;
- int count = 0;
- int i = 1;
- uint64_t promoted = 0;
- uint64_t demoted = 0;
- char key[256] = {
- 0,
- };
- char *node_name = NULL;
- gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
- char *status_str = NULL;
- gf_boolean_t down = _gf_false;
- double elapsed = 0;
- int hrs = 0;
- int min = 0;
- int sec = 0;
-
- ret = dict_get_int32(dict, "count", &count);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "count not set");
- goto out;
- }
-
- cli_out("%-20s %-20s %-20s %-20s %-20s", "Node", "Promoted files",
- "Demoted files", "Status", "run time in h:m:s");
- cli_out("%-20s %-20s %-20s %-20s %-20s", "---------", "---------",
- "---------", "---------", "---------");
-
- for (i = 1; i <= count; i++) {
- /* Reset the variables to prevent carryover of values */
- node_name = NULL;
- promoted = 0;
- demoted = 0;
-
- /* Check if status is NOT_STARTED, and continue early */
- snprintf(key, sizeof(key), "status-%d", i);
-
- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
- if (ret == -ENOENT) {
- gf_log("cli", GF_LOG_TRACE,
- "count: %d, %d,"
- "failed to get status",
- count, i);
- gf_log("cli", GF_LOG_ERROR,
- "node down and has failed"
- " to set dict");
- down = _gf_true;
- continue;
- /*skipping this node as value unavailable*/
- } else if (ret) {
- gf_log("cli", GF_LOG_TRACE,
- "count: %d, %d,"
- "failed to get status",
- count, i);
- continue;
- }
-
- if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
- continue;
-
- snprintf(key, sizeof(key), "node-name-%d", i);
- ret = dict_get_str(dict, key, &node_name);
- if (ret)
- gf_log("cli", GF_LOG_TRACE, "failed to get node-name");
-
- snprintf(key, sizeof(key), "promoted-%d", i);
- ret = dict_get_uint64(dict, key, &promoted);
- if (ret)
- gf_log("cli", GF_LOG_TRACE, "failed to get promoted count");
-
- snprintf(key, sizeof(key), "demoted-%d", i);
- ret = dict_get_uint64(dict, key, &demoted);
- if (ret)
- gf_log("cli", GF_LOG_TRACE, "failed to get demoted count");
-
- snprintf(key, sizeof(key), "run-time-%d", i);
- ret = dict_get_double(dict, key, &elapsed);
- if (ret)
- gf_log("cli", GF_LOG_TRACE, "failed to get run-time");
-
- /* Check for array bound */
- if (status_rcd >= GF_DEFRAG_STATUS_MAX)
- status_rcd = GF_DEFRAG_STATUS_MAX;
-
- hrs = elapsed / 3600;
- min = ((int)elapsed % 3600) / 60;
- sec = ((int)elapsed % 3600) % 60;
-
- status_str = cli_vol_task_status_str[status_rcd];
- cli_out("%-20s %-20" PRIu64 " %-20" PRIu64
- " %-20s"
- " %d:%d:%d",
- node_name, promoted, demoted, status_str, hrs, min, sec);
- }
- if (down)
- cli_out(
- "WARNING: glusterd might be down on one or more nodes."
- " Please check the nodes that are down using \'gluster"
- " peer status\' and start the glusterd on those nodes.");
-out:
- return ret;
-}
-
-int
+static int
gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -1921,18 +1681,18 @@ gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
- ret = dict_get_str(local->dict, "volname", &volname);
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
if (ret) {
gf_log(frame->this->name, GF_LOG_ERROR, "Failed to get volname");
goto out;
}
- ret = dict_get_int32(local->dict, "rebalance-command", (int32_t *)&cmd);
+ ret = dict_get_int32_sizen(local->dict, "rebalance-command",
+ (int32_t *)&cmd);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Failed to get command");
goto out;
@@ -1944,21 +1704,17 @@ gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret < 0) {
- gf_log("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
+ gf_log("glusterd", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
- if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS) ||
- (cmd == GF_DEFRAG_CMD_STATUS_TIER)) &&
+ if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) &&
!(global_state->mode & GLUSTER_MODE_XML)) {
- ret = dict_get_str(dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ ret = dict_get_str_sizen(dict, GF_REBALANCE_TID_KEY, &task_id_str);
if (ret) {
gf_log("cli", GF_LOG_WARNING, "failed to get %s from dict",
GF_REBALANCE_TID_KEY);
- goto out;
}
if (rsp.op_ret && strcmp(rsp.op_errstr, "")) {
snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
@@ -1968,26 +1724,13 @@ gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
* case since unlock failures can be highlighted
* event though rebalance command was successful
*/
- if (cmd == GF_DEFRAG_CMD_START_TIER) {
- snprintf(msg, sizeof(msg),
- "Tier "
- "start is successful on %s.",
- volname);
- } else if (cmd == GF_DEFRAG_CMD_STOP_TIER) {
- snprintf(msg, sizeof(msg),
- "Tier "
- "daemon stopped "
- "on %s.",
- volname);
- } else {
- snprintf(msg, sizeof(msg),
- "Rebalance on %s has been "
- "started successfully. Use "
- "rebalance status command to"
- " check status of the "
- "rebalance process.\nID: %s",
- volname, task_id_str);
- }
+ snprintf(msg, sizeof(msg),
+ "Rebalance on %s has been "
+ "started successfully. Use "
+ "rebalance status command to"
+ " check status of the "
+ "rebalance process.\nID: %s",
+ volname, task_id_str);
} else {
snprintf(msg, sizeof(msg),
"Starting rebalance on volume %s has "
@@ -2020,14 +1763,13 @@ gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
rsp.op_errstr);
}
}
- if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STATUS_TIER) {
+ if (cmd == GF_DEFRAG_CMD_STATUS) {
if (rsp.op_ret == -1) {
if (strcmp(rsp.op_errstr, ""))
snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
else
snprintf(msg, sizeof(msg),
- "Failed to get the status of "
- "rebalance process");
+ "Failed to get the status of rebalance process");
goto done;
} else {
snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
@@ -2040,15 +1782,7 @@ gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- if (cmd == GF_DEFRAG_CMD_STATUS_TIER)
- ret = gf_cli_print_tier_status(dict, GF_TASK_TYPE_REBALANCE);
- else if (cmd == GF_DEFRAG_CMD_DETACH_STATUS)
- ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REBALANCE,
- _gf_true);
- else
- ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REBALANCE,
- _gf_false);
-
+ ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REBALANCE);
if (ret)
gf_log("cli", GF_LOG_ERROR, "Failed to print rebalance status");
@@ -2058,23 +1792,9 @@ done:
rsp.op_errstr);
else {
if (rsp.op_ret)
-
- if (cmd == GF_DEFRAG_CMD_START_TIER ||
- cmd == GF_DEFRAG_CMD_STATUS_TIER) {
- cli_err(
- "Tiering Migration Functionality: %s:"
- " failed%s%s",
- volname, strlen(msg) ? ": " : "", msg);
- } else
- cli_err("volume rebalance: %s: failed%s%s", volname,
- strlen(msg) ? ": " : "", msg);
- else if (cmd == GF_DEFRAG_CMD_START_TIER ||
- cmd == GF_DEFRAG_CMD_STATUS_TIER) {
- cli_out(
- "Tiering Migration Functionality: %s:"
- " success%s%s",
- volname, strlen(msg) ? ": " : "", msg);
- } else
+ cli_err("volume rebalance: %s: failed%s%s", volname,
+ strlen(msg) ? ": " : "", msg);
+ else
cli_out("volume rebalance: %s: success%s%s", volname,
strlen(msg) ? ": " : "", msg);
}
@@ -2088,7 +1808,7 @@ out:
return ret;
}
-int
+static int
gf_cli_rename_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -2109,7 +1829,7 @@ gf_cli_rename_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -2121,7 +1841,7 @@ gf_cli_rename_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_str("volRename", msg, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -2138,7 +1858,7 @@ out:
return ret;
}
-int
+static int
gf_cli_reset_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -2159,7 +1879,7 @@ gf_cli_reset_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -2175,7 +1895,7 @@ gf_cli_reset_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_str("volReset", msg, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -2192,7 +1912,63 @@ out:
return ret;
}
-char *
+static int
+gf_cli_ganesha_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to ganesha");
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("nfs-ganesha: failed: %s", rsp.op_errstr);
+ else
+ cli_err("nfs-ganesha: failed");
+ }
+
+ else {
+ cli_out("nfs-ganesha : success ");
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ return ret;
+}
+
+static char *
is_server_debug_xlator(void *myframe)
{
call_frame_t *frame = NULL;
@@ -2237,7 +2013,7 @@ is_server_debug_xlator(void *myframe)
return debug_xlator;
}
-int
+static int
gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -2264,7 +2040,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -2279,8 +2055,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "failed to unserialize volume set respone dict");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
@@ -2291,7 +2066,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
*/
debug_xlator = is_server_debug_xlator(myframe);
- if (dict_get_str(dict, "help-str", &help_str) && !msg[0])
+ if (dict_get_str_sizen(dict, "help-str", &help_str) && !msg[0])
snprintf(msg, sizeof(msg), "Set volume %s",
(rsp.op_ret) ? "unsuccessful" : "successful");
if (rsp.op_ret == 0 && debug_xlator) {
@@ -2306,7 +2081,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_str("volSet", msg, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -2338,405 +2113,6 @@ out:
}
int
-gf_cli_add_tier_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gf_cli_rsp rsp = {
- 0,
- };
- int ret = -1;
- char msg[1024] = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("cli", myframe, out);
-
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
- goto out;
- }
-
- gf_log("cli", GF_LOG_INFO, "Received resp to attach tier");
-
- if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
- snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
- else
- snprintf(msg, sizeof(msg), "Attach tier %s",
- (rsp.op_ret) ? "unsuccessful" : "successful");
-
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str("volAttachTier", msg, rsp.op_ret, rsp.op_errno,
- rsp.op_errstr);
- if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
- goto out;
- }
-
- if (rsp.op_ret)
- cli_err("volume attach-tier: failed: %s", msg);
- else
- cli_out("volume attach-tier: success");
- ret = rsp.op_ret;
-
-out:
- cli_cmd_broadcast_response(ret);
- gf_free_xdr_cli_rsp(rsp);
- return ret;
-}
-
-int
-gf_cli_attach_tier_cbk(struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gf_cli_rsp rsp = {
- 0,
- };
- int ret = -1;
- char msg[1024] = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("cli", myframe, out);
-
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
- goto out;
- }
-
- gf_log("cli", GF_LOG_INFO, "Received resp to attach tier");
-
- if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
- snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
- else
- snprintf(msg, sizeof(msg), "Attach tier %s",
- (rsp.op_ret) ? "unsuccessful" : "successful");
-
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str("volAttachTier", msg, rsp.op_ret, rsp.op_errno,
- rsp.op_errstr);
- if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
- goto out;
- }
-
- if (rsp.op_ret)
- cli_err("volume attach-tier: failed: %s", msg);
- else
- cli_out("volume attach-tier: success");
- ret = rsp.op_ret;
-
-out:
- cli_cmd_broadcast_response(ret);
- gf_free_xdr_cli_rsp(rsp);
- return ret;
-}
-
-int
-gf_cli_remove_tier_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gf_cli_rsp rsp = {
- 0,
- };
- int ret = -1;
- char msg[1024] = {
- 0,
- };
- char *cmd_str = "unknown";
- cli_local_t *local = NULL;
- call_frame_t *frame = NULL;
- char *task_id_str = NULL;
- dict_t *rsp_dict = NULL;
- int32_t command = 0;
-
- GF_ASSERT(myframe);
-
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- frame = myframe;
-
- GF_ASSERT(frame->local);
-
- local = frame->local;
-
- ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
- goto out;
- }
-
- ret = dict_get_int32(local->dict, "command", &command);
- if (ret) {
- gf_log("", GF_LOG_ERROR, "failed to get command");
- goto out;
- }
-
- if (rsp.dict.dict_len) {
- rsp_dict = dict_new();
- if (!rsp_dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed to unserialize rsp_dict");
- goto out;
- }
- }
-
- switch (command) {
- case GF_DEFRAG_CMD_DETACH_START:
- cmd_str = "start";
-
- ret = dict_get_str(rsp_dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "remove-brick-id is not present in dict");
- }
- break;
- case GF_DEFRAG_CMD_DETACH_COMMIT:
- cmd_str = "commit";
- break;
- case GF_DEFRAG_CMD_DETACH_COMMIT_FORCE:
- cmd_str = "commit force";
- break;
- case GF_DEFRAG_CMD_DETACH_STOP:
- cmd_str = "stop";
- break;
- case GF_DEFRAG_CMD_DETACH_STATUS:
- cmd_str = "status";
- break;
-
- default:
- cmd_str = "unknown";
- break;
- }
-
- gf_log("cli", GF_LOG_INFO, "Received resp to detach tier");
-
- if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
- snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
- else
- snprintf(msg, sizeof(msg), "Detach tier %s %s", cmd_str,
- (rsp.op_ret) ? "unsuccessful" : "successful");
-
- ret = rsp.op_ret;
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_remove_brick_detach_tier(
- _gf_true, rsp_dict, rsp.op_ret, rsp.op_errno, msg, "volDetachTier");
-
- if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
- goto out;
- } else {
- if (rsp.op_ret) {
- if (strcmp(rsp.op_errstr, ""))
- snprintf(msg, sizeof(msg),
- "volume tier "
- "detach %s: failed: %s",
- cmd_str, rsp.op_errstr);
- else
- snprintf(msg, sizeof(msg),
- "volume tier "
- "detach %s: failed",
- cmd_str);
-
- cli_err("%s", msg);
- goto out;
-
- } else {
- cli_out("volume detach tier %s: success", cmd_str);
- if (GF_DEFRAG_CMD_DETACH_START == command && task_id_str != NULL)
- cli_out("ID: %s", task_id_str);
- if (GF_DEFRAG_CMD_DETACH_COMMIT == command)
- cli_out(
- "Check the detached bricks to ensure "
- "all files are migrated.\nIf files "
- "with data are found on the brick "
- "path, copy them via a gluster mount "
- "point before re-purposing the "
- "removed brick. ");
- }
- }
- if (command == GF_DEFRAG_CMD_DETACH_STOP ||
- command == GF_DEFRAG_CMD_DETACH_STATUS)
- ret = gf_cli_print_rebalance_status(rsp_dict, GF_TASK_TYPE_REMOVE_BRICK,
- _gf_true);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to print remove-brick "
- "rebalance status");
- goto out;
- }
-
- if ((command == GF_DEFRAG_CMD_DETACH_STOP) && (rsp.op_ret == 0)) {
- cli_out(
- "'detach tier' process may be in the middle of a "
- "file migration.\nThe process will be fully stopped "
- "once the migration of the file is complete.\nPlease "
- "check detach tier process for completion before "
- "doing any further brick related tasks on the "
- "volume.");
- }
- ret = rsp.op_ret;
-
-out:
- cli_cmd_broadcast_response(ret);
- gf_free_xdr_cli_rsp(rsp);
- return ret;
-}
-
-int
-gf_cli_detach_tier_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- gf_cli_rsp rsp = {
- 0,
- };
- int ret = -1;
- dict_t *dict = NULL;
- char msg[1024] = {
- 0,
- };
- int32_t command = 0;
- gf1_op_commands cmd = GF_OP_CMD_NONE;
- cli_local_t *local = NULL;
- call_frame_t *frame = NULL;
- char *cmd_str = "unknown";
-
- GF_ASSERT(myframe);
-
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- frame = myframe;
-
- GF_ASSERT(frame->local);
-
- local = frame->local;
-
- ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
- goto out;
- }
-
- ret = dict_get_int32(local->dict, "command", &command);
- if (ret)
- goto out;
-
- cmd = command;
-
- switch (cmd) {
- case GF_OP_CMD_STOP_DETACH_TIER:
- cmd_str = "stop";
- break;
- case GF_OP_CMD_STATUS:
- cmd_str = "status";
- break;
- default:
- break;
- }
-
- ret = rsp.op_ret;
- if (rsp.op_ret == -1) {
- if (strcmp(rsp.op_errstr, ""))
- snprintf(msg, sizeof(msg),
- "volume tier detach %s: "
- "failed: %s",
- cmd_str, rsp.op_errstr);
- else
- snprintf(msg, sizeof(msg),
- "volume tier detach %s: "
- "failed",
- cmd_str);
-
- if (global_state->mode & GLUSTER_MODE_XML)
- goto xml_output;
-
- cli_err("%s", msg);
- goto out;
- }
-
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new();
-
- ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (ret < 0) {
- strncpy(msg,
- "failed to unserialize req-buffer to "
- "dictionary",
- sizeof(msg));
-
- if (global_state->mode & GLUSTER_MODE_XML) {
- rsp.op_ret = -1;
- goto xml_output;
- }
-
- gf_log("cli", GF_LOG_ERROR, "%s", msg);
- goto out;
- }
- }
-xml_output:
- if (global_state->mode & GLUSTER_MODE_XML) {
- if (strcmp(rsp.op_errstr, "")) {
- ret = cli_xml_output_vol_remove_brick_detach_tier(
- _gf_true, dict, rsp.op_ret, rsp.op_errno, rsp.op_errstr,
- "volDetachTier");
- } else {
- ret = cli_xml_output_vol_remove_brick_detach_tier(
- _gf_true, dict, rsp.op_ret, rsp.op_errno, msg, "volDetachTier");
- }
- goto out;
- }
-
- ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REMOVE_BRICK,
- _gf_true);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to print remove-brick "
- "rebalance status");
- goto out;
- }
-
- if ((cmd == GF_OP_CMD_STOP_DETACH_TIER) && (rsp.op_ret == 0)) {
- cli_out(
- "'detach tier' process may be in the middle of a "
- "file migration.\nThe process will be fully stopped "
- "once the migration of the file is complete.\nPlease "
- "check detach tier process for completion before "
- "doing any further brick related tasks on the "
- "volume.");
- }
-
-out:
- if (dict)
- dict_unref(dict);
- cli_cmd_broadcast_response(ret);
- gf_free_xdr_cli_rsp(rsp);
- return ret;
-}
-
-int
gf_cli_add_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -2757,7 +2133,7 @@ gf_cli_add_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -2773,7 +2149,7 @@ gf_cli_add_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_str("volAddBrick", msg, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -2789,7 +2165,7 @@ out:
return ret;
}
-int
+static int
gf_cli3_remove_brick_status_cbk(struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
@@ -2805,7 +2181,7 @@ gf_cli3_remove_brick_status_cbk(struct rpc_req *req, struct iovec *iov,
gf1_op_commands cmd = GF_OP_CMD_NONE;
cli_local_t *local = NULL;
call_frame_t *frame = NULL;
- char *cmd_str = "unknown";
+ const char *cmd_str;
GF_ASSERT(myframe);
@@ -2821,12 +2197,11 @@ gf_cli3_remove_brick_status_cbk(struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
- ret = dict_get_int32(local->dict, "command", &command);
+ ret = dict_get_int32_sizen(local->dict, "command", &command);
if (ret)
goto out;
@@ -2840,20 +2215,17 @@ gf_cli3_remove_brick_status_cbk(struct rpc_req *req, struct iovec *iov,
cmd_str = "status";
break;
default:
+ cmd_str = "unknown";
break;
}
ret = rsp.op_ret;
if (rsp.op_ret == -1) {
if (strcmp(rsp.op_errstr, ""))
- snprintf(msg, sizeof(msg),
- "volume remove-brick %s: "
- "failed: %s",
+ snprintf(msg, sizeof(msg), "volume remove-brick %s: failed: %s",
cmd_str, rsp.op_errstr);
else
- snprintf(msg, sizeof(msg),
- "volume remove-brick %s: "
- "failed",
+ snprintf(msg, sizeof(msg), "volume remove-brick %s: failed",
cmd_str);
if (global_state->mode & GLUSTER_MODE_XML)
@@ -2869,10 +2241,7 @@ gf_cli3_remove_brick_status_cbk(struct rpc_req *req, struct iovec *iov,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret < 0) {
- strncpy(msg,
- "failed to unserialize req-buffer to "
- "dictionary",
- sizeof(msg));
+ strncpy(msg, DICT_UNSERIALIZE_FAIL, sizeof(msg));
if (global_state->mode & GLUSTER_MODE_XML) {
rsp.op_ret = -1;
@@ -2887,23 +2256,21 @@ gf_cli3_remove_brick_status_cbk(struct rpc_req *req, struct iovec *iov,
xml_output:
if (global_state->mode & GLUSTER_MODE_XML) {
if (strcmp(rsp.op_errstr, "")) {
- ret = cli_xml_output_vol_remove_brick_detach_tier(
- _gf_true, dict, rsp.op_ret, rsp.op_errno, rsp.op_errstr,
- "volRemoveBrick");
+ ret = cli_xml_output_vol_remove_brick(_gf_true, dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr,
+ "volRemoveBrick");
} else {
- ret = cli_xml_output_vol_remove_brick_detach_tier(
- _gf_true, dict, rsp.op_ret, rsp.op_errno, msg,
- "volRemoveBrick");
+ ret = cli_xml_output_vol_remove_brick(_gf_true, dict, rsp.op_ret,
+ rsp.op_errno, msg,
+ "volRemoveBrick");
}
goto out;
}
- ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REMOVE_BRICK,
- _gf_false);
+ ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REMOVE_BRICK);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to print remove-brick "
- "rebalance status");
+ "Failed to print remove-brick rebalance status");
goto out;
}
@@ -2925,7 +2292,7 @@ out:
return ret;
}
-int
+static int
gf_cli_remove_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -2957,12 +2324,11 @@ gf_cli_remove_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
- ret = dict_get_int32(local->dict, "command", (int32_t *)&cmd);
+ ret = dict_get_int32_sizen(local->dict, "command", (int32_t *)&cmd);
if (ret) {
gf_log("", GF_LOG_ERROR, "failed to get command");
goto out;
@@ -2977,7 +2343,7 @@ gf_cli_remove_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed to unserialize rsp_dict");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
@@ -2987,7 +2353,8 @@ gf_cli_remove_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
case GF_OP_CMD_START:
cmd_str = "start";
- ret = dict_get_str(rsp_dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ ret = dict_get_str_sizen(rsp_dict, GF_REMOVE_BRICK_TID_KEY,
+ &task_id_str);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
"remove-brick-id is not present in dict");
@@ -3013,11 +2380,11 @@ gf_cli_remove_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
(rsp.op_ret) ? "unsuccessful" : "successful");
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_remove_brick_detach_tier(
- _gf_false, rsp_dict, rsp.op_ret, rsp.op_errno, msg,
- "volRemoveBrick");
+ ret = cli_xml_output_vol_remove_brick(_gf_false, rsp_dict, rsp.op_ret,
+ rsp.op_errno, msg,
+ "volRemoveBrick");
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -3048,7 +2415,7 @@ out:
return ret;
}
-int
+static int
gf_cli_reset_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -3058,7 +2425,7 @@ gf_cli_reset_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = -1;
cli_local_t *local = NULL;
call_frame_t *frame = NULL;
- char *rb_operation_str = NULL;
+ const char *rb_operation_str = NULL;
dict_t *rsp_dict = NULL;
char msg[1024] = {
0,
@@ -3079,80 +2446,53 @@ gf_cli_reset_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
- ret = dict_get_str(local->dict, "operation", &reset_op);
+ ret = dict_get_str_sizen(local->dict, "operation", &reset_op);
if (ret) {
gf_log(frame->this->name, GF_LOG_ERROR, "dict_get on operation failed");
goto out;
}
+ if (strcmp(reset_op, "GF_RESET_OP_START") &&
+ strcmp(reset_op, "GF_RESET_OP_COMMIT") &&
+ strcmp(reset_op, "GF_RESET_OP_COMMIT_FORCE")) {
+ ret = -1;
+ goto out;
+ }
+
if (rsp.dict.dict_len) {
/* Unserialize the dictionary */
rsp_dict = dict_new();
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "failed to "
- "unserialize rsp buffer to dictionary");
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
- if (strcmp(reset_op, "GF_RESET_OP_START") &&
- strcmp(reset_op, "GF_RESET_OP_COMMIT") &&
- strcmp(reset_op, "GF_RESET_OP_COMMIT_FORCE")) {
- rb_operation_str = gf_strdup("Unknown operation");
- ret = -1;
- goto out;
- }
-
if (rsp.op_ret && (strcmp(rsp.op_errstr, ""))) {
- rb_operation_str = gf_strdup(rsp.op_errstr);
+ rb_operation_str = rsp.op_errstr;
} else {
if (!strcmp(reset_op, "GF_RESET_OP_START")) {
if (rsp.op_ret)
- rb_operation_str = gf_strdup(
- "reset-brick "
- "start "
- "operation "
- "failed");
+ rb_operation_str = "reset-brick start operation failed";
else
- rb_operation_str = gf_strdup(
- "reset-brick "
- "start "
- "operation "
- "successful");
+ rb_operation_str = "reset-brick start operation successful";
} else if (!strcmp(reset_op, "GF_RESET_OP_COMMIT")) {
if (rsp.op_ret)
- rb_operation_str = gf_strdup(
- "reset-brick "
- "commit "
- "operation "
- "failed");
+ rb_operation_str = "reset-brick commit operation failed";
else
- rb_operation_str = gf_strdup(
- "reset-brick "
- "commit "
- "operation "
- "successful");
+ rb_operation_str = "reset-brick commit operation successful";
} else if (!strcmp(reset_op, "GF_RESET_OP_COMMIT_FORCE")) {
if (rsp.op_ret)
- rb_operation_str = gf_strdup(
- "reset-brick "
- "commit "
- "force operation "
- "failed");
+ rb_operation_str = "reset-brick commit force operation failed";
else
- rb_operation_str = gf_strdup(
- "reset-brick "
- "commit "
- "force operation "
- "successful");
+ rb_operation_str =
+ "reset-brick commit force operation successful";
}
}
@@ -3164,7 +2504,7 @@ gf_cli_reset_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_replace_brick(rsp_dict, rsp.op_ret,
rsp.op_errno, msg);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -3181,9 +2521,6 @@ out:
if (local)
cli_local_wipe(local);
- if (rb_operation_str)
- GF_FREE(rb_operation_str);
-
cli_cmd_broadcast_response(ret);
gf_free_xdr_cli_rsp(rsp);
if (rsp_dict)
@@ -3191,7 +2528,7 @@ out:
return ret;
}
-int
+static int
gf_cli_replace_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -3201,7 +2538,7 @@ gf_cli_replace_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = -1;
cli_local_t *local = NULL;
call_frame_t *frame = NULL;
- char *rb_operation_str = NULL;
+ const char *rb_operation_str = NULL;
dict_t *rsp_dict = NULL;
char msg[1024] = {
0,
@@ -3222,12 +2559,11 @@ gf_cli_replace_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
- ret = dict_get_str(local->dict, "operation", &replace_op);
+ ret = dict_get_str_sizen(local->dict, "operation", &replace_op);
if (ret) {
gf_log(frame->this->name, GF_LOG_ERROR, "dict_get on operation failed");
goto out;
@@ -3239,29 +2575,23 @@ gf_cli_replace_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "failed to "
- "unserialize rsp buffer to dictionary");
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
if (!strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
if (rsp.op_ret || ret)
- rb_operation_str = gf_strdup(
- "replace-brick commit "
- "force operation failed");
+ rb_operation_str = "replace-brick commit force operation failed";
else
- rb_operation_str = gf_strdup(
- "replace-brick commit "
- "force operation "
- "successful");
+ rb_operation_str =
+ "replace-brick commit force operation successful";
} else {
gf_log(frame->this->name, GF_LOG_DEBUG, "Unknown operation");
}
if (rsp.op_ret && (strcmp(rsp.op_errstr, ""))) {
- rb_operation_str = gf_strdup(rsp.op_errstr);
+ rb_operation_str = rsp.op_errstr;
}
gf_log("cli", GF_LOG_INFO, "Received resp to replace brick");
@@ -3272,7 +2602,7 @@ gf_cli_replace_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_replace_brick(rsp_dict, rsp.op_ret,
rsp.op_errno, msg);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -3289,9 +2619,6 @@ out:
if (local)
cli_local_wipe(local);
- if (rb_operation_str)
- GF_FREE(rb_operation_str);
-
cli_cmd_broadcast_response(ret);
gf_free_xdr_cli_rsp(rsp);
if (rsp_dict)
@@ -3321,7 +2648,7 @@ gf_cli_log_rotate_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -3337,7 +2664,7 @@ gf_cli_log_rotate_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_str("volLogRotate", msg, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -3375,7 +2702,7 @@ gf_cli_sync_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -3391,7 +2718,7 @@ gf_cli_sync_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_str("volSync", msg, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -3420,29 +2747,23 @@ print_quota_list_usage_output(cli_local_t *local, char *path, int64_t avail,
char *hl_str = NULL;
char *sl_val = NULL;
- used_str = gf_uint64_2human_readable(used_space->size);
-
- if (limit_set) {
- hl_str = gf_uint64_2human_readable(limits->hl);
- avail_str = gf_uint64_2human_readable(avail);
-
- sl_val = gf_uint64_2human_readable(sl_num);
- }
-
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_quota_xml_output(local, path, limits->hl, sl_str, sl_num,
used_space->size, avail, sl ? "Yes" : "No",
hl ? "Yes" : "No", limit_set);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to "
- "output in xml format for quota "
- "list command");
+ "Failed to output in xml format for quota list command");
}
goto out;
}
+ used_str = gf_uint64_2human_readable(used_space->size);
+
if (limit_set) {
+ hl_str = gf_uint64_2human_readable(limits->hl);
+ sl_val = gf_uint64_2human_readable(sl_num);
+
if (!used_str) {
cli_out("%-40s %7s %7s(%s) %8" PRIu64 "%9" PRIu64
""
@@ -3450,6 +2771,7 @@ print_quota_list_usage_output(cli_local_t *local, char *path, int64_t avail,
path, hl_str, sl_str, sl_val, used_space->size, avail,
sl ? "Yes" : "No", hl ? "Yes" : "No");
} else {
+ avail_str = gf_uint64_2human_readable(avail);
cli_out("%-40s %7s %7s(%s) %8s %7s %15s %20s", path, hl_str, sl_str,
sl_val, used_str, avail_str, sl ? "Yes" : "No",
hl ? "Yes" : "No");
@@ -3485,9 +2807,7 @@ print_quota_list_object_output(cli_local_t *local, char *path, int64_t avail,
hl ? "Yes" : "No", limit_set);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to "
- "output in xml format for quota "
- "list command");
+ "Failed to output in xml format for quota list command");
}
goto out;
}
@@ -3533,16 +2853,17 @@ print_quota_list_output(cli_local_t *local, char *path, char *default_sl,
ret = gf_string2percent(default_sl, &sl_num);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "could not convert default soft limit"
- " to percent");
+ "could not convert default soft limit to percent");
goto out;
}
sl_num = (sl_num * limits->hl) / 100;
sl_final = default_sl;
} else {
sl_num = (limits->sl * limits->hl) / 100;
- snprintf(percent_str, sizeof(percent_str), "%" PRIu64 "%%",
- limits->sl);
+ ret = snprintf(percent_str, sizeof(percent_str), "%" PRIu64 "%%",
+ limits->sl);
+ if (ret < 0)
+ goto out;
sl_final = percent_str;
}
if (type == GF_QUOTA_OPTION_TYPE_LIST)
@@ -3602,9 +2923,8 @@ print_quota_list_from_mountdir(cli_local_t *local, char *mountdir,
ret = sys_lgetxattr(mountdir, key, (void *)&limits, sizeof(limits));
if (ret < 0) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to get the xattr %s "
- "on %s. Reason : %s",
- key, mountdir, strerror(errno));
+ "Failed to get the xattr %s on %s. Reason : %s", key, mountdir,
+ strerror(errno));
switch (errno) {
#if defined(ENODATA)
@@ -3663,9 +2983,7 @@ enoattr:
}
if (ret < 0) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to get quota size "
- "on path %s: %s",
+ gf_log("cli", GF_LOG_ERROR, "Failed to get quota size on path %s: %s",
mountdir, strerror(errno));
print_quota_list_empty(path, type);
goto out;
@@ -3681,7 +2999,7 @@ out:
return ret;
}
-int
+static int
gluster_remove_auxiliary_mount(char *volname)
{
int ret = -1;
@@ -3696,25 +3014,24 @@ gluster_remove_auxiliary_mount(char *volname)
GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(mountdir, volname, "/");
ret = gf_umount_lazy(this->name, mountdir, 1);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "umount on %s failed, "
- "reason : %s",
+ gf_log("cli", GF_LOG_ERROR, "umount on %s failed, reason : %s",
mountdir, strerror(errno));
}
return ret;
}
-int
+static int
gf_cli_print_limit_list_from_dict(cli_local_t *local, char *volname,
dict_t *dict, char *default_sl, int count,
int op_ret, int op_errno, char *op_errstr)
{
int ret = -1;
int i = 0;
- char key[1024] = {
+ char key[32] = {
0,
};
+ int keylen;
char mountdir[PATH_MAX] = {
0,
};
@@ -3724,7 +3041,7 @@ gf_cli_print_limit_list_from_dict(cli_local_t *local, char *volname,
if (!dict || count <= 0)
goto out;
- ret = dict_get_int32(dict, "type", &type);
+ ret = dict_get_int32_sizen(dict, "type", &type);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Failed to get quota type");
goto out;
@@ -3734,7 +3051,7 @@ gf_cli_print_limit_list_from_dict(cli_local_t *local, char *volname,
ret = cli_xml_output_vol_quota_limit_list_begin(local, op_ret, op_errno,
op_errstr);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting xml begin");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
} else {
@@ -3742,13 +3059,11 @@ gf_cli_print_limit_list_from_dict(cli_local_t *local, char *volname,
}
while (count--) {
- snprintf(key, sizeof(key), "path%d", i++);
+ keylen = snprintf(key, sizeof(key), "path%d", i++);
- ret = dict_get_str(dict, key, &path);
+ ret = dict_get_strn(dict, key, keylen, &path);
if (ret < 0) {
- gf_log("cli", GF_LOG_DEBUG,
- "Path not present in limit"
- " list");
+ gf_log("cli", GF_LOG_DEBUG, "Path not present in limit list");
continue;
}
@@ -3764,7 +3079,7 @@ out:
return ret;
}
-int
+static int
print_quota_list_from_quotad(call_frame_t *frame, dict_t *rsp_dict)
{
char *path = NULL;
@@ -3787,25 +3102,22 @@ print_quota_list_from_quotad(call_frame_t *frame, dict_t *rsp_dict)
local = frame->local;
gd_rsp_dict = local->dict;
- ret = dict_get_int32(rsp_dict, "type", &type);
+ ret = dict_get_int32_sizen(rsp_dict, "type", &type);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Failed to get type");
goto out;
}
- ret = dict_get_str(rsp_dict, GET_ANCESTRY_PATH_KEY, &path);
+ ret = dict_get_str_sizen(rsp_dict, GET_ANCESTRY_PATH_KEY, &path);
if (ret) {
- gf_log("cli", GF_LOG_WARNING,
- "path key is not present "
- "in dict");
+ gf_log("cli", GF_LOG_WARNING, "path key is not present in dict");
goto out;
}
- ret = dict_get_str(gd_rsp_dict, "default-soft-limit", &default_sl);
+ ret = dict_get_str_sizen(gd_rsp_dict, "default-soft-limit", &default_sl);
if (ret) {
gf_log(frame->this->name, GF_LOG_ERROR,
- "failed to "
- "get default soft limit");
+ "failed to get default soft limit");
goto out;
}
@@ -3844,19 +3156,18 @@ print_quota_list_from_quotad(call_frame_t *frame, dict_t *rsp_dict)
LOCK(&local->lock);
{
- ret = dict_get_int32(gd_rsp_dict, "quota-list-success-count",
- &success_count);
+ ret = dict_get_int32_sizen(gd_rsp_dict, "quota-list-success-count",
+ &success_count);
if (ret)
success_count = 0;
- ret = dict_set_int32(gd_rsp_dict, "quota-list-success-count",
- success_count + 1);
+ ret = dict_set_int32_sizen(gd_rsp_dict, "quota-list-success-count",
+ success_count + 1);
}
UNLOCK(&local->lock);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to set "
- "quota-list-success-count in dict");
+ "Failed to set quota-list-success-count in dict");
goto out;
}
@@ -3866,9 +3177,7 @@ print_quota_list_from_quotad(call_frame_t *frame, dict_t *rsp_dict)
} else {
ret = cli_xml_output_vol_quota_limit_list_begin(local, 0, 0, NULL);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Error in "
- "printing xml output");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
}
@@ -3880,7 +3189,7 @@ out:
return ret;
}
-void *
+static void *
cli_cmd_broadcast_response_detached(void *opaque)
{
int32_t ret = 0;
@@ -3891,7 +3200,7 @@ cli_cmd_broadcast_response_detached(void *opaque)
return NULL;
}
-int32_t
+static int32_t
cli_quota_compare_path(struct list_head *list1, struct list_head *list2)
{
struct list_node *node1 = NULL;
@@ -3908,18 +3217,18 @@ cli_quota_compare_path(struct list_head *list1, struct list_head *list2)
dict1 = node1->ptr;
dict2 = node2->ptr;
- ret = dict_get_str(dict1, GET_ANCESTRY_PATH_KEY, &path1);
+ ret = dict_get_str_sizen(dict1, GET_ANCESTRY_PATH_KEY, &path1);
if (ret < 0)
return 0;
- ret = dict_get_str(dict2, GET_ANCESTRY_PATH_KEY, &path2);
+ ret = dict_get_str_sizen(dict2, GET_ANCESTRY_PATH_KEY, &path2);
if (ret < 0)
return 0;
return strcmp(path1, path2);
}
-int
+static int
cli_quotad_getlimit_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -3949,19 +3258,18 @@ cli_quotad_getlimit_cbk(struct rpc_req *req, struct iovec *iov, int count,
LOCK(&local->lock);
{
- ret = dict_get_int32(local->dict, "quota-list-count", &list_count);
+ ret = dict_get_int32_sizen(local->dict, "quota-list-count",
+ &list_count);
if (ret)
list_count = 0;
list_count++;
- ret = dict_set_int32(local->dict, "quota-list-count", list_count);
+ ret = dict_set_int32_sizen(local->dict, "quota-list-count", list_count);
}
UNLOCK(&local->lock);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to set "
- "quota-list-count in dict");
+ gf_log("cli", GF_LOG_ERROR, "Failed to set quota-list-count in dict");
goto out;
}
@@ -3976,8 +3284,7 @@ cli_quotad_getlimit_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -3996,9 +3303,7 @@ cli_quotad_getlimit_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret < 0) {
- gf_log("cli", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
@@ -4012,7 +3317,7 @@ cli_quotad_getlimit_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
}
- ret = dict_get_int32(local->dict, "max_count", &max_count);
+ ret = dict_get_int32_sizen(local->dict, "max_count", &max_count);
if (ret < 0) {
gf_log("cli", GF_LOG_ERROR, "failed to get max_count");
goto out;
@@ -4048,18 +3353,17 @@ out:
ret = pthread_create(&th_id, NULL, cli_cmd_broadcast_response_detached,
(void *)-1);
if (ret)
- gf_log("cli", GF_LOG_ERROR,
- "pthread_create failed: "
- "%s",
+ gf_log("cli", GF_LOG_ERROR, "pthread_create failed: %s",
strerror(errno));
} else {
cli_cmd_broadcast_response(ret);
}
gf_free_xdr_cli_rsp(rsp);
+
return ret;
}
-int
+static int
cli_quotad_getlimit(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4079,7 +3383,7 @@ cli_quotad_getlimit(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
&req.dict.dict_len);
if (ret < 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to serialize the data");
+ gf_log(this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
goto out;
}
@@ -4089,28 +3393,30 @@ cli_quotad_getlimit(call_frame_t *frame, xlator_t *this, void *data)
cli_quotad_getlimit_cbk, (xdrproc_t)xdr_gf_cli_req);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-void
+static void
gf_cli_quota_list(cli_local_t *local, char *volname, dict_t *dict,
char *default_sl, int count, int op_ret, int op_errno,
char *op_errstr)
{
- GF_VALIDATE_OR_GOTO("cli", volname, out);
-
- if (!connected)
+ if (!cli_cmd_connected())
goto out;
- if (count > 0)
+ if (count > 0) {
+ GF_VALIDATE_OR_GOTO("cli", volname, out);
+
gf_cli_print_limit_list_from_dict(local, volname, dict, default_sl,
count, op_ret, op_errno, op_errstr);
+ }
out:
return;
}
-int
+static int
gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -4141,8 +3447,7 @@ gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -4154,9 +3459,7 @@ gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
if (strcmp(rsp.op_errstr, "")) {
cli_err("quota command failed : %s", rsp.op_errstr);
if (rsp.op_ret == -ENOENT)
- cli_err(
- "please enter the path relative to "
- "the volume");
+ cli_err("please enter the path relative to the volume");
} else {
cli_err("quota command : failed");
}
@@ -4170,24 +3473,17 @@ gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret < 0) {
- gf_log("cli", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
gf_log("cli", GF_LOG_DEBUG, "Received resp to quota command");
- ret = dict_get_str(dict, "volname", &volname);
- if (ret)
- gf_log(frame->this->name, GF_LOG_ERROR, "failed to get volname");
-
- ret = dict_get_str(dict, "default-soft-limit", &default_sl);
+ ret = dict_get_str_sizen(dict, "default-soft-limit", &default_sl);
if (ret)
gf_log(frame->this->name, GF_LOG_TRACE,
- "failed to get "
- "default soft limit");
+ "failed to get default soft limit");
// default-soft-limit is part of rsp_dict only iff we sent
// GLUSTER_CLI_QUOTA with type being GF_QUOTA_OPTION_TYPE_LIST
@@ -4197,8 +3493,8 @@ gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = -1;
goto out;
}
- ret = dict_set_dynstr(local->dict, "default-soft-limit",
- default_sl_dup);
+ ret = dict_set_dynstr_sizen(local->dict, "default-soft-limit",
+ default_sl_dup);
if (ret) {
gf_log(frame->this->name, GF_LOG_TRACE,
"failed to set default soft limit");
@@ -4206,11 +3502,15 @@ gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
}
- ret = dict_get_int32(dict, "type", &type);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to get volname");
+
+ ret = dict_get_int32_sizen(dict, "type", &type);
if (ret)
gf_log(frame->this->name, GF_LOG_TRACE, "failed to get type");
- ret = dict_get_int32(dict, "count", &entry_count);
+ ret = dict_get_int32_sizen(dict, "count", &entry_count);
if (ret)
gf_log(frame->this->name, GF_LOG_TRACE, "failed to get count");
@@ -4223,9 +3523,7 @@ gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_quota_limit_list_end(local);
if (ret < 0) {
ret = -1;
- gf_log("cli", GF_LOG_ERROR,
- "Error in printing"
- " xml output");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
goto out;
}
@@ -4237,7 +3535,7 @@ xml_output:
ret = cli_xml_output_str("volQuota", NULL, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -4262,7 +3560,7 @@ out:
return ret;
}
-int
+static int
gf_cli_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -4281,7 +3579,7 @@ gf_cli_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -4313,7 +3611,7 @@ out:
return ret;
}
-int
+static int
gf_cli_pmap_b2p_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -4332,7 +3630,7 @@ gf_cli_pmap_b2p_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -4355,7 +3653,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_probe(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {
@@ -4374,9 +3672,9 @@ gf_cli_probe(call_frame_t *frame, xlator_t *this, void *data)
dict = data;
- ret = dict_get_int32(dict, "port", &port);
+ ret = dict_get_int32_sizen(dict, "port", &port);
if (ret) {
- ret = dict_set_int32(dict, "port", CLI_GLUSTERD_PORT);
+ ret = dict_set_int32_sizen(dict, "port", CLI_GLUSTERD_PORT);
if (ret)
goto out;
}
@@ -4387,12 +3685,12 @@ gf_cli_probe(call_frame_t *frame, xlator_t *this, void *data)
out:
GF_FREE(req.dict.dict_val);
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_deprobe(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {
@@ -4411,16 +3709,16 @@ gf_cli_deprobe(call_frame_t *frame, xlator_t *this, void *data)
}
dict = data;
- ret = dict_get_int32(dict, "port", &port);
+ ret = dict_get_int32_sizen(dict, "port", &port);
if (ret) {
- ret = dict_set_int32(dict, "port", CLI_GLUSTERD_PORT);
+ ret = dict_set_int32_sizen(dict, "port", CLI_GLUSTERD_PORT);
if (ret)
goto out;
}
- ret = dict_get_int32(dict, "flags", &flags);
+ ret = dict_get_int32_sizen(dict, "flags", &flags);
if (ret) {
- ret = dict_set_int32(dict, "flags", 0);
+ ret = dict_set_int32_sizen(dict, "flags", 0);
if (ret)
goto out;
}
@@ -4431,12 +3729,12 @@ gf_cli_deprobe(call_frame_t *frame, xlator_t *this, void *data)
out:
GF_FREE(req.dict.dict_val);
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_list_friends(call_frame_t *frame, xlator_t *this, void *data)
{
gf1_cli_peer_list_req req = {
@@ -4469,11 +3767,11 @@ out:
*/
frame->local = NULL;
}
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_get_state(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {
@@ -4484,24 +3782,18 @@ gf_cli_get_state(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_get_state_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_GET_STATE, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_get_next_volume(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = 0;
@@ -4519,7 +3811,7 @@ gf_cli_get_next_volume(call_frame_t *frame, xlator_t *this, void *data)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_vol_info_begin(local, 0, 0, "");
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
}
@@ -4547,15 +3839,15 @@ end_xml:
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_vol_info_end(local);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4566,7 +3858,7 @@ gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data)
dict_t *dict = NULL;
int32_t flags = 0;
- if (!frame || !this || !data) {
+ if (!this || !data) {
ret = -1;
goto out;
}
@@ -4581,13 +3873,13 @@ gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data)
}
if (ctx->volname) {
- ret = dict_set_str(dict, "volname", ctx->volname);
+ ret = dict_set_str_sizen(dict, "volname", ctx->volname);
if (ret)
goto out;
}
flags = ctx->flags;
- ret = dict_set_int32(dict, "flags", flags);
+ ret = dict_set_int32_sizen(dict, "flags", flags);
if (ret) {
gf_log(frame->this->name, GF_LOG_ERROR, "failed to set flags");
goto out;
@@ -4596,7 +3888,7 @@ gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
&req.dict.dict_len);
if (ret) {
- gf_log(frame->this->name, GF_LOG_ERROR, "failed to serialize dict");
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
goto out;
}
@@ -4610,11 +3902,11 @@ out:
GF_FREE(req.dict.dict_val);
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli3_1_uuid_get(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4623,22 +3915,16 @@ gf_cli3_1_uuid_get(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli3_1_uuid_get_cbk,
(xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_UUID_GET,
this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli3_1_uuid_reset(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4647,22 +3933,16 @@ gf_cli3_1_uuid_reset(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli3_1_uuid_reset_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_UUID_RESET, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_create_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4671,26 +3951,19 @@ gf_cli_create_volume(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_create_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_CREATE_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_delete_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4699,25 +3972,18 @@ gf_cli_delete_volume(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_delete_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_DELETE_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
GF_FREE(req.dict.dict_val);
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_start_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4726,25 +3992,19 @@ gf_cli_start_volume(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_start_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_START_VOLUME, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_stop_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4753,25 +4013,18 @@ gf_cli_stop_volume(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = data;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_stop_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_STOP_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_defrag_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4780,25 +4033,18 @@ gf_cli_defrag_volume(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_defrag_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_DEFRAG_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_rename_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4817,7 +4063,7 @@ gf_cli_rename_volume(call_frame_t *frame, xlator_t *this, void *data)
ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
&req.dict.dict_len);
if (ret < 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to serialize the data");
+ gf_log(this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
goto out;
}
@@ -4827,12 +4073,13 @@ gf_cli_rename_volume(call_frame_t *frame, xlator_t *this, void *data)
gf_cli_rename_volume_cbk, (xdrproc_t)xdr_gf_cli_req);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_reset_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -4841,125 +4088,37 @@ gf_cli_reset_volume(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_reset_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_RESET_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
-gf_cli_set_volume(call_frame_t *frame, xlator_t *this, void *data)
-{
- gf_cli_req req = {{
- 0,
- }};
- int ret = 0;
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
- dict = data;
-
- ret = cli_to_glusterd(&req, frame, gf_cli_set_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req, dict,
- GLUSTER_CLI_SET_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
- GF_FREE(req.dict.dict_val);
-
- return ret;
-}
-
-int32_t
-gf_cli_add_brick(call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_ganesha(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
0,
}};
int ret = 0;
dict_t *dict = NULL;
- char *volname = NULL;
- int32_t count = 0;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
dict = data;
- ret = dict_get_str(dict, "volname", &volname);
-
- if (ret)
- goto out;
-
- ret = dict_get_int32(dict, "count", &count);
- if (ret)
- goto out;
-
- ret = cli_to_glusterd(&req, frame, gf_cli_add_brick_cbk,
- (xdrproc_t)xdr_gf_cli_req, dict,
- GLUSTER_CLI_ADD_BRICK, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- GF_FREE(req.dict.dict_val);
-
- return ret;
-}
-
-int32_t
-gf_cli_tier(call_frame_t *frame, xlator_t *this, void *data)
-{
- int ret = 0;
- int32_t command = 0;
- gf_cli_req req = {{
- 0,
- }};
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
- dict = data;
-
- ret = dict_get_int32(dict, "rebalance-command", &command);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed to get rebalance-command");
- goto out;
- }
-
- ret = cli_to_glusterd(&req, frame, gf_cli_defrag_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_TIER,
+ ret = cli_to_glusterd(&req, frame, gf_cli_ganesha_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_GANESHA,
this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
-gf_cli_add_tier_brick(call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_set_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
0,
@@ -4967,140 +4126,40 @@ gf_cli_add_tier_brick(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
- ret = cli_to_glusterd(&req, frame, gf_cli_add_tier_brick_cbk,
+ ret = cli_to_glusterd(&req, frame, gf_cli_set_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
- GLUSTER_CLI_ADD_TIER_BRICK, this, cli_rpc_prog, NULL);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to send request to "
- "glusterd");
- goto out;
- }
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
+ GLUSTER_CLI_SET_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
+
return ret;
}
int32_t
-gf_cli_attach_tier(call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_add_brick(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
0,
}};
int ret = 0;
dict_t *dict = NULL;
- dict_t *newdict = NULL;
- char *tierwords[] = {"volume", "tier", "", "start", NULL};
- const char **words = (const char **)tierwords;
- char *volname = NULL;
- cli_local_t *local = NULL;
- cli_local_t *oldlocal = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
dict = data;
- ret = cli_to_glusterd(&req, frame, gf_cli_attach_tier_cbk,
+ ret = cli_to_glusterd(&req, frame, gf_cli_add_brick_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
- GLUSTER_CLI_ATTACH_TIER, this, cli_rpc_prog, NULL);
- if (ret)
- goto out;
- ret = dict_get_str(dict, "volname", &volname);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
- goto notify_cli;
- }
-
- words[2] = volname;
- ret = cli_cmd_volume_old_tier_parse((const char **)words, 4, &newdict);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to parse tier start "
- "command");
- goto notify_cli;
- }
-
- gf_log("cli", GF_LOG_DEBUG, "Sending tier start");
-
- oldlocal = frame->local;
- CLI_LOCAL_INIT(local, words, frame, newdict);
- ret = gf_cli_tier(frame, this, newdict);
- frame->local = oldlocal;
- cli_local_wipe(local);
-
-notify_cli:
- if (ret) {
- cli_out(
- "Failed to run tier start. Please execute tier start "
- "command explicitly");
- cli_out("Usage : gluster volume tier <volname> start");
- }
+ GLUSTER_CLI_ADD_BRICK, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
- return ret;
-}
-
-int32_t
-gf_cli_remove_tier_brick(call_frame_t *frame, xlator_t *this, void *data)
-{
- gf_cli_req status_req = {{
- 0,
- }};
- int ret = 0;
- dict_t *dict = NULL;
- int32_t command = 0;
- char *volname = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
- dict = data;
-
- ret = dict_get_str(dict, "volname", &volname);
- if (ret)
- goto out;
-
- ret = dict_get_int32(dict, "command", &command);
- if (ret)
- goto out;
-
- ret = dict_set_int32(dict, "rebalance-command", (int32_t)command);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to set dict");
- goto out;
- }
-
- ret = cli_to_glusterd(&status_req, frame, gf_cli_remove_tier_brick_cbk,
- (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_TIER,
- this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- GF_FREE(status_req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -5114,21 +4173,16 @@ gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
int32_t command = 0;
- char *volname = NULL;
int32_t cmd = 0;
- if (!frame || !this || !data) {
+ if (!frame || !this) {
ret = -1;
goto out;
}
dict = data;
- ret = dict_get_str(dict, "volname", &volname);
- if (ret)
- goto out;
-
- ret = dict_get_int32(dict, "command", &command);
+ ret = dict_get_int32_sizen(dict, "command", &command);
if (ret)
goto out;
@@ -5143,7 +4197,7 @@ gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data)
else
cmd |= GF_DEFRAG_CMD_STOP;
- ret = dict_set_int32(dict, "rebalance-command", (int32_t)cmd);
+ ret = dict_set_int32_sizen(dict, "rebalance-command", (int32_t)cmd);
if (ret) {
gf_log(this->name, GF_LOG_ERROR, "Failed to set dict");
goto out;
@@ -5156,7 +4210,7 @@ gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data)
}
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
@@ -5165,7 +4219,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_reset_brick(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -5174,8 +4228,6 @@ gf_cli_reset_brick(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
char *dst_brick = NULL;
- char *src_brick = NULL;
- char *volname = NULL;
char *op = NULL;
if (!frame || !this || !data) {
@@ -5185,49 +4237,34 @@ gf_cli_reset_brick(call_frame_t *frame, xlator_t *this, void *data)
dict = data;
- ret = dict_get_str(dict, "operation", &op);
+ ret = dict_get_str_sizen(dict, "operation", &op);
if (ret) {
gf_log(this->name, GF_LOG_DEBUG, "dict_get on operation failed");
goto out;
}
- ret = dict_get_str(dict, "volname", &volname);
- if (ret) {
- gf_log(this->name, GF_LOG_DEBUG, "dict_get on volname failed");
- goto out;
- }
-
- ret = dict_get_str(dict, "src-brick", &src_brick);
- if (ret) {
- gf_log(this->name, GF_LOG_DEBUG, "dict_get on src-brick failed");
- goto out;
- }
-
if (!strcmp(op, "GF_RESET_OP_COMMIT") ||
!strcmp(op, "GF_RESET_OP_COMMIT_FORCE")) {
- ret = dict_get_str(dict, "dst-brick", &dst_brick);
+ ret = dict_get_str_sizen(dict, "dst-brick", &dst_brick);
if (ret) {
gf_log(this->name, GF_LOG_DEBUG, "dict_get on dst-brick failed");
goto out;
}
}
- gf_log(this->name, GF_LOG_DEBUG, "Received command reset-brick %s on %s.",
- op, src_brick);
-
ret = cli_to_glusterd(&req, frame, gf_cli_reset_brick_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_RESET_BRICK, this, cli_rpc_prog, NULL);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_replace_brick(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -5235,9 +4272,6 @@ gf_cli_replace_brick(call_frame_t *frame, xlator_t *this, void *data)
}};
int ret = 0;
dict_t *dict = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- char *volname = NULL;
int32_t op = 0;
if (!frame || !this || !data) {
@@ -5247,47 +4281,25 @@ gf_cli_replace_brick(call_frame_t *frame, xlator_t *this, void *data)
dict = data;
- ret = dict_get_int32(dict, "operation", &op);
+ ret = dict_get_int32_sizen(dict, "operation", &op);
if (ret) {
gf_log(this->name, GF_LOG_DEBUG, "dict_get on operation failed");
goto out;
}
- ret = dict_get_str(dict, "volname", &volname);
- if (ret) {
- gf_log(this->name, GF_LOG_DEBUG, "dict_get on volname failed");
- goto out;
- }
-
- ret = dict_get_str(dict, "src-brick", &src_brick);
- if (ret) {
- gf_log(this->name, GF_LOG_DEBUG, "dict_get on src-brick failed");
- goto out;
- }
-
- ret = dict_get_str(dict, "dst-brick", &dst_brick);
- if (ret) {
- gf_log(this->name, GF_LOG_DEBUG, "dict_get on dst-brick failed");
- goto out;
- }
-
- gf_log(this->name, GF_LOG_DEBUG,
- "Received command replace-brick %s with "
- "%s with operation=%d",
- src_brick, dst_brick, op);
ret = cli_to_glusterd(&req, frame, gf_cli_replace_brick_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_REPLACE_BRICK, this, cli_rpc_prog, NULL);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_log_rotate(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -5296,25 +4308,18 @@ gf_cli_log_rotate(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_log_rotate_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_LOG_ROTATE, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_sync_volume(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = 0;
@@ -5323,25 +4328,18 @@ gf_cli_sync_volume(call_frame_t *frame, xlator_t *this, void *data)
}};
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_sync_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_SYNC_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_getspec(call_frame_t *frame, xlator_t *this, void *data)
{
gf_getspec_req req = {
@@ -5351,14 +4349,14 @@ gf_cli_getspec(call_frame_t *frame, xlator_t *this, void *data)
dict_t *dict = NULL;
dict_t *op_dict = NULL;
- if (!frame || !this || !data) {
+ if (!frame || !this) {
ret = -1;
goto out;
}
dict = data;
- ret = dict_get_str(dict, "volid", &req.key);
+ ret = dict_get_str_sizen(dict, "volid", &req.key);
if (ret)
goto out;
@@ -5370,26 +4368,24 @@ gf_cli_getspec(call_frame_t *frame, xlator_t *this, void *data)
// Set the supported min and max op-versions, so glusterd can make a
// decision
- ret = dict_set_int32(op_dict, "min-op-version", GD_OP_VERSION_MIN);
+ ret = dict_set_int32_sizen(op_dict, "min-op-version", GD_OP_VERSION_MIN);
if (ret) {
gf_log(THIS->name, GF_LOG_ERROR,
- "Failed to set min-op-version"
- " in request dict");
+ "Failed to set min-op-version in request dict");
goto out;
}
- ret = dict_set_int32(op_dict, "max-op-version", GD_OP_VERSION_MAX);
+ ret = dict_set_int32_sizen(op_dict, "max-op-version", GD_OP_VERSION_MAX);
if (ret) {
gf_log(THIS->name, GF_LOG_ERROR,
- "Failed to set max-op-version"
- " in request dict");
+ "Failed to set max-op-version in request dict");
goto out;
}
ret = dict_allocate_and_serialize(op_dict, &req.xdata.xdata_val,
&req.xdata.xdata_len);
if (ret < 0) {
- gf_log(THIS->name, GF_LOG_ERROR, "Failed to serialize dictionary");
+ gf_log(THIS->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
goto out;
}
@@ -5401,12 +4397,13 @@ out:
if (op_dict) {
dict_unref(op_dict);
}
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE(req.xdata.xdata_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_quota(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -5415,24 +4412,17 @@ gf_cli_quota(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_quota_cbk,
(xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_QUOTA,
this, cli_rpc_prog, NULL);
-
-out:
GF_FREE(req.dict.dict_val);
-
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_pmap_b2p(call_frame_t *frame, xlator_t *this, void *data)
{
pmap_port_by_brick_req req = {
@@ -5441,14 +4431,14 @@ gf_cli_pmap_b2p(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
+ if (!frame || !this) {
ret = -1;
goto out;
}
dict = data;
- ret = dict_get_str(dict, "brick", &req.brick);
+ ret = dict_get_str_sizen(dict, "brick", &req.brick);
if (ret)
goto out;
@@ -5457,7 +4447,7 @@ gf_cli_pmap_b2p(call_frame_t *frame, xlator_t *this, void *data)
(xdrproc_t)xdr_pmap_port_by_brick_req);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
@@ -5472,7 +4462,8 @@ gf_cli_fsm_log_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = -1;
dict_t *dict = NULL;
int tr_count = 0;
- char key[256] = {0};
+ char key[64] = {0};
+ int keylen;
int i = 0;
char *old_state = NULL;
char *new_state = NULL;
@@ -5488,7 +4479,7 @@ gf_cli_fsm_log_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -5510,33 +4501,33 @@ gf_cli_fsm_log_cbk(struct rpc_req *req, struct iovec *iov, int count,
&dict);
if (ret) {
- cli_err("bad response");
+ cli_err(DICT_UNSERIALIZE_FAIL);
goto out;
}
- ret = dict_get_int32(dict, "count", &tr_count);
- if (tr_count)
+ ret = dict_get_int32_sizen(dict, "count", &tr_count);
+ if (!ret && tr_count)
cli_out("number of transitions: %d", tr_count);
else
cli_err("No transitions");
for (i = 0; i < tr_count; i++) {
- snprintf(key, sizeof(key), "log%d-old-state", i);
- ret = dict_get_str(dict, key, &old_state);
+ keylen = snprintf(key, sizeof(key), "log%d-old-state", i);
+ ret = dict_get_strn(dict, key, keylen, &old_state);
if (ret)
goto out;
- snprintf(key, sizeof(key), "log%d-event", i);
- ret = dict_get_str(dict, key, &event);
+ keylen = snprintf(key, sizeof(key), "log%d-event", i);
+ ret = dict_get_strn(dict, key, keylen, &event);
if (ret)
goto out;
- snprintf(key, sizeof(key), "log%d-new-state", i);
- ret = dict_get_str(dict, key, &new_state);
+ keylen = snprintf(key, sizeof(key), "log%d-new-state", i);
+ ret = dict_get_strn(dict, key, keylen, &new_state);
if (ret)
goto out;
- snprintf(key, sizeof(key), "log%d-time", i);
- ret = dict_get_str(dict, key, &time);
+ keylen = snprintf(key, sizeof(key), "log%d-time", i);
+ ret = dict_get_strn(dict, key, keylen, &time);
if (ret)
goto out;
cli_out(
@@ -5559,7 +4550,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_fsm_log(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
@@ -5579,12 +4570,12 @@ gf_cli_fsm_log(call_frame_t *frame, xlator_t *this, void *data)
(xdrproc_t)xdr_gf1_cli_fsm_log_req);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int
+static int
gf_cli_gsync_config_command(dict_t *dict)
{
runner_t runner = {
@@ -5599,7 +4590,7 @@ gf_cli_gsync_config_command(dict_t *dict)
int ret = -1;
char conf_path[PATH_MAX] = "";
- if (dict_get_str(dict, "subop", &subop) != 0)
+ if (dict_get_str_sizen(dict, "subop", &subop) != 0)
return -1;
if (strcmp(subop, "get") != 0 && strcmp(subop, "get-all") != 0) {
@@ -5607,16 +4598,16 @@ gf_cli_gsync_config_command(dict_t *dict)
return 0;
}
- if (dict_get_str(dict, "glusterd_workdir", &gwd) != 0 ||
- dict_get_str(dict, "slave", &slave) != 0)
+ if (dict_get_str_sizen(dict, "glusterd_workdir", &gwd) != 0 ||
+ dict_get_str_sizen(dict, "slave", &slave) != 0)
return -1;
- if (dict_get_str(dict, "master", &master) != 0)
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
master = NULL;
- if (dict_get_str(dict, "op_name", &op_name) != 0)
+ if (dict_get_str_sizen(dict, "op_name", &op_name) != 0)
op_name = NULL;
- ret = dict_get_str(dict, "conf_path", &confpath);
+ ret = dict_get_str_sizen(dict, "conf_path", &confpath);
if (ret || !confpath) {
ret = snprintf(conf_path, sizeof(conf_path) - 1,
"%s/" GEOREP "/gsyncd_template.conf", gwd);
@@ -5638,7 +4629,7 @@ gf_cli_gsync_config_command(dict_t *dict)
return runner_run(&runner);
}
-int
+static int
gf_cli_print_status(char **title_values, gf_gsync_status_t **sts_vals,
int *spacing, int gsync_count, int number_of_fields,
int is_detail)
@@ -5667,7 +4658,7 @@ gf_cli_print_status(char **title_values, gf_gsync_status_t **sts_vals,
total_spacing += 4; /* For the spacing between the fields */
/* char pointers for each field */
- output_values = GF_CALLOC(number_of_fields, sizeof(char *),
+ output_values = GF_MALLOC(number_of_fields * sizeof(char *),
gf_common_mt_char);
if (!output_values) {
ret = -1;
@@ -5682,12 +4673,6 @@ gf_cli_print_status(char **title_values, gf_gsync_status_t **sts_vals,
}
}
- hyphens = GF_CALLOC(total_spacing + 1, sizeof(char), gf_common_mt_char);
- if (!hyphens) {
- ret = -1;
- goto out;
- }
-
cli_out(" ");
/* setting the title "NODE", "MASTER", etc. from title_values[]
@@ -5710,6 +4695,12 @@ gf_cli_print_status(char **title_values, gf_gsync_status_t **sts_vals,
output_values[10], output_values[11], output_values[12],
output_values[13], output_values[14], output_values[15]);
+ hyphens = GF_MALLOC((total_spacing + 1) * sizeof(char), gf_common_mt_char);
+ if (!hyphens) {
+ ret = -1;
+ goto out;
+ }
+
/* setting and printing the hyphens */
memset(hyphens, '-', total_spacing);
hyphens[total_spacing] = '\0';
@@ -5774,7 +4765,7 @@ gf_gsync_status_t_comparator(const void *p, const void *q)
return strcmp(slavekey1, slavekey2);
}
-int
+static int
gf_cli_read_status_data(dict_t *dict, gf_gsync_status_t **sts_vals,
int *spacing, int gsync_count, int number_of_fields)
{
@@ -5814,7 +4805,7 @@ out:
return ret;
}
-int
+static int
gf_cli_gsync_status_output(dict_t *dict, gf_boolean_t is_detail)
{
int gsync_count = 0;
@@ -5825,47 +4816,43 @@ gf_cli_gsync_status_output(dict_t *dict, gf_boolean_t is_detail)
char errmsg[1024] = "";
char *master = NULL;
char *slave = NULL;
- char *title_values[] = {"MASTER NODE",
- "MASTER VOL",
- "MASTER BRICK",
- "SLAVE USER",
- "SLAVE",
- "SLAVE NODE",
- "STATUS",
- "CRAWL STATUS",
- "LAST_SYNCED",
- "ENTRY",
- "DATA",
- "META",
- "FAILURES",
- "CHECKPOINT TIME",
- "CHECKPOINT COMPLETED",
- "CHECKPOINT COMPLETION TIME"};
+ static char *title_values[] = {"MASTER NODE",
+ "MASTER VOL",
+ "MASTER BRICK",
+ "SLAVE USER",
+ "SLAVE",
+ "SLAVE NODE",
+ "STATUS",
+ "CRAWL STATUS",
+ "LAST_SYNCED",
+ "ENTRY",
+ "DATA",
+ "META",
+ "FAILURES",
+ "CHECKPOINT TIME",
+ "CHECKPOINT COMPLETED",
+ "CHECKPOINT COMPLETION TIME"};
gf_gsync_status_t **sts_vals = NULL;
/* Checks if any session is active or not */
- ret = dict_get_int32(dict, "gsync-count", &gsync_count);
+ ret = dict_get_int32_sizen(dict, "gsync-count", &gsync_count);
if (ret) {
- ret = dict_get_str(dict, "master", &master);
+ ret = dict_get_str_sizen(dict, "master", &master);
- ret = dict_get_str(dict, "slave", &slave);
+ ret = dict_get_str_sizen(dict, "slave", &slave);
if (master) {
if (slave)
snprintf(errmsg, sizeof(errmsg),
- "No active "
- "geo-replication sessions between %s"
+ "No active geo-replication sessions between %s"
" and %s",
master, slave);
else
snprintf(errmsg, sizeof(errmsg),
- "No active "
- "geo-replication sessions for %s",
- master);
+ "No active geo-replication sessions for %s", master);
} else
snprintf(errmsg, sizeof(errmsg),
- "No active "
- "geo-replication sessions");
+ "No active geo-replication sessions");
gf_log("cli", GF_LOG_INFO, "%s", errmsg);
cli_out("%s", errmsg);
@@ -5885,14 +4872,6 @@ gf_cli_gsync_status_output(dict_t *dict, gf_boolean_t is_detail)
ret = -1;
goto out;
}
- for (i = 0; i < gsync_count; i++) {
- sts_vals[i] = GF_CALLOC(1, sizeof(gf_gsync_status_t),
- gf_common_mt_char);
- if (!sts_vals[i]) {
- ret = -1;
- goto out;
- }
- }
ret = gf_cli_read_status_data(dict, sts_vals, spacing, gsync_count,
num_of_fields);
@@ -5921,13 +4900,13 @@ write_contents_to_common_pem_file(dict_t *dict, int output_count)
char *workdir = NULL;
char common_pem_file[PATH_MAX] = "";
char *output = NULL;
- char output_name[PATH_MAX] = "";
+ char output_name[32] = "";
int bytes_written = 0;
int fd = -1;
int ret = -1;
int i = -1;
- ret = dict_get_str(dict, "glusterd_workdir", &workdir);
+ ret = dict_get_str_sizen(dict, "glusterd_workdir", &workdir);
if (ret || !workdir) {
gf_log("", GF_LOG_ERROR, "Unable to fetch workdir");
ret = -1;
@@ -5941,17 +4920,15 @@ write_contents_to_common_pem_file(dict_t *dict, int output_count)
fd = open(common_pem_file, O_WRONLY | O_CREAT, 0600);
if (fd == -1) {
- gf_log("", GF_LOG_ERROR,
- "Failed to open %s"
- " Error : %s",
+ gf_log("", GF_LOG_ERROR, "Failed to open %s Error : %s",
common_pem_file, strerror(errno));
ret = -1;
goto out;
}
for (i = 1; i <= output_count; i++) {
- snprintf(output_name, sizeof(output_name), "output_%d", i);
- ret = dict_get_str(dict, output_name, &output);
+ ret = snprintf(output_name, sizeof(output_name), "output_%d", i);
+ ret = dict_get_strn(dict, output_name, ret, &output);
if (ret) {
gf_log("", GF_LOG_ERROR, "Failed to get %s.", output_name);
cli_out("Unable to fetch output.");
@@ -5959,9 +4936,7 @@ write_contents_to_common_pem_file(dict_t *dict, int output_count)
if (output) {
bytes_written = sys_write(fd, output, strlen(output));
if (bytes_written != strlen(output)) {
- gf_log("", GF_LOG_ERROR,
- "Failed to write "
- "to %s",
+ gf_log("", GF_LOG_ERROR, "Failed to write to %s",
common_pem_file);
ret = -1;
goto out;
@@ -5983,11 +4958,11 @@ out:
if (fd >= 0)
sys_close(fd);
- gf_log("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int
+static int
gf_cli_sys_exec_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -5996,7 +4971,7 @@ gf_cli_sys_exec_cbk(struct rpc_req *req, struct iovec *iov, int count,
int i = -1;
char *output = NULL;
char *command = NULL;
- char output_name[PATH_MAX] = "";
+ char output_name[32] = "";
gf_cli_rsp rsp = {
0,
};
@@ -6011,7 +4986,7 @@ gf_cli_sys_exec_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -6033,14 +5008,14 @@ gf_cli_sys_exec_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- ret = dict_get_int32(dict, "output_count", &output_count);
+ ret = dict_get_int32_sizen(dict, "output_count", &output_count);
if (ret) {
cli_out("Command executed successfully.");
ret = 0;
goto out;
}
- ret = dict_get_str(dict, "command", &command);
+ ret = dict_get_str_sizen(dict, "command", &command);
if (ret) {
gf_log("", GF_LOG_ERROR, "Unable to get command from dict");
goto out;
@@ -6053,8 +5028,8 @@ gf_cli_sys_exec_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
for (i = 1; i <= output_count; i++) {
- snprintf(output_name, sizeof(output_name), "output_%d", i);
- ret = dict_get_str(dict, output_name, &output);
+ ret = snprintf(output_name, sizeof(output_name), "output_%d", i);
+ ret = dict_get_strn(dict, output_name, ret, &output);
if (ret) {
gf_log("", GF_LOG_ERROR, "Failed to get %s.", output_name);
cli_out("Unable to fetch output.");
@@ -6074,7 +5049,7 @@ out:
return ret;
}
-int
+static int
gf_cli_copy_file_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -6093,7 +5068,7 @@ gf_cli_copy_file_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -6125,7 +5100,7 @@ out:
return ret;
}
-int
+static int
gf_cli_gsync_set_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -6149,7 +5124,7 @@ gf_cli_gsync_set_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -6169,7 +5144,7 @@ gf_cli_gsync_set_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_gsync(dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -6180,11 +5155,11 @@ gf_cli_gsync_set_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- ret = dict_get_str(dict, "gsync-status", &gsync_status);
+ ret = dict_get_str_sizen(dict, "gsync-status", &gsync_status);
if (!ret)
cli_out("%s", gsync_status);
- ret = dict_get_int32(dict, "type", &type);
+ ret = dict_get_int32_sizen(dict, "type", &type);
if (ret) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
"failed to get type");
@@ -6194,29 +5169,25 @@ gf_cli_gsync_set_cbk(struct rpc_req *req, struct iovec *iov, int count,
switch (type) {
case GF_GSYNC_OPTION_TYPE_START:
case GF_GSYNC_OPTION_TYPE_STOP:
- if (dict_get_str(dict, "master", &master) != 0)
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
master = "???";
- if (dict_get_str(dict, "slave", &slave) != 0)
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
slave = "???";
cli_out(
- "%s " GEOREP
- " session between %s & %s"
- " has been successful",
+ "%s " GEOREP " session between %s & %s has been successful",
type == GF_GSYNC_OPTION_TYPE_START ? "Starting" : "Stopping",
master, slave);
break;
case GF_GSYNC_OPTION_TYPE_PAUSE:
case GF_GSYNC_OPTION_TYPE_RESUME:
- if (dict_get_str(dict, "master", &master) != 0)
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
master = "???";
- if (dict_get_str(dict, "slave", &slave) != 0)
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
slave = "???";
- cli_out("%s " GEOREP
- " session between %s & %s"
- " has been successful",
+ cli_out("%s " GEOREP " session between %s & %s has been successful",
type == GF_GSYNC_OPTION_TYPE_PAUSE ? "Pausing" : "Resuming",
master, slave);
break;
@@ -6232,24 +5203,22 @@ gf_cli_gsync_set_cbk(struct rpc_req *req, struct iovec *iov, int count,
break;
case GF_GSYNC_OPTION_TYPE_DELETE:
- if (dict_get_str(dict, "master", &master) != 0)
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
master = "???";
- if (dict_get_str(dict, "slave", &slave) != 0)
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
slave = "???";
cli_out("Deleting " GEOREP
- " session between %s & %s"
- " has been successful",
+ " session between %s & %s has been successful",
master, slave);
break;
case GF_GSYNC_OPTION_TYPE_CREATE:
- if (dict_get_str(dict, "master", &master) != 0)
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
master = "???";
- if (dict_get_str(dict, "slave", &slave) != 0)
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
slave = "???";
cli_out("Creating " GEOREP
- " session between %s & %s"
- " has been successful",
+ " session between %s & %s has been successful",
master, slave);
break;
@@ -6265,7 +5234,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_sys_exec(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = 0;
@@ -6274,23 +5243,22 @@ gf_cli_sys_exec(call_frame_t *frame, xlator_t *this, void *data)
0,
}};
- if (!frame || !this || !data) {
- ret = -1;
- gf_log("cli", GF_LOG_ERROR, "Invalid data");
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_sys_exec_cbk,
(xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_SYS_EXEC,
this, cli_rpc_prog, NULL);
-out:
+ if (ret)
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid data");
+ }
+
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_copy_file(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = 0;
@@ -6299,23 +5267,22 @@ gf_cli_copy_file(call_frame_t *frame, xlator_t *this, void *data)
0,
}};
- if (!frame || !this || !data) {
- ret = -1;
- gf_log("cli", GF_LOG_ERROR, "Invalid data");
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_copy_file_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_COPY_FILE, this, cli_rpc_prog, NULL);
-out:
+ if (ret)
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid data");
+ }
+
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_gsync_set(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = 0;
@@ -6324,18 +5291,11 @@ gf_cli_gsync_set(call_frame_t *frame, xlator_t *this, void *data)
0,
}};
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_gsync_set_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_GSYNC_SET, this, cli_rpc_prog, NULL);
-
-out:
GF_FREE(req.dict.dict_val);
return ret;
@@ -6346,20 +5306,18 @@ cli_profile_info_percentage_cmp(void *a, void *b)
{
cli_profile_info_t *ia = NULL;
cli_profile_info_t *ib = NULL;
- int ret = 0;
ia = a;
ib = b;
if (ia->percentage_avg_latency < ib->percentage_avg_latency)
- ret = -1;
+ return -1;
else if (ia->percentage_avg_latency > ib->percentage_avg_latency)
- ret = 1;
- else
- ret = 0;
- return ret;
+ return 1;
+
+ return 0;
}
-void
+static void
cmd_profile_volume_brick_out(dict_t *dict, int count, int interval)
{
char key[256] = {0};
@@ -6383,7 +5341,8 @@ cmd_profile_volume_brick_out(dict_t *dict, int count, int interval)
double total_percentage_latency = 0;
for (i = 0; i < 32; i++) {
- snprintf(key, sizeof(key), "%d-%d-read-%d", count, interval, (1 << i));
+ snprintf(key, sizeof(key), "%d-%d-read-%" PRIu32, count, interval,
+ (1U << i));
ret = dict_get_uint64(dict, key, &rb_counts[i]);
if (ret) {
gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
@@ -6391,7 +5350,8 @@ cmd_profile_volume_brick_out(dict_t *dict, int count, int interval)
}
for (i = 0; i < 32; i++) {
- snprintf(key, sizeof(key), "%d-%d-write-%d", count, interval, (1 << i));
+ snprintf(key, sizeof(key), "%d-%d-write-%" PRIu32, count, interval,
+ (1U << i));
ret = dict_get_uint64(dict, key, &wb_counts[i]);
if (ret) {
gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
@@ -6477,7 +5437,8 @@ cmd_profile_volume_brick_out(dict_t *dict, int count, int interval)
if ((rb_counts[i] == 0) && (wb_counts[i] == 0))
continue;
per_line++;
- snprintf(output + index, sizeof(output) - index, "%19db+ ", (1 << i));
+ snprintf(output + index, sizeof(output) - index, "%19" PRIu32 "b+ ",
+ (1U << i));
if (rb_counts[i]) {
snprintf(read_blocks + index, sizeof(read_blocks) - index,
"%21" PRId64 " ", rb_counts[i]);
@@ -6554,7 +5515,7 @@ cmd_profile_volume_brick_out(dict_t *dict, int count, int interval)
cli_out(" ");
}
-int32_t
+static int32_t
gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -6564,7 +5525,8 @@ gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
int ret = -1;
dict_t *dict = NULL;
gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- char key[256] = {0};
+ char key[64] = {0};
+ int len;
int interval = 0;
int i = 1;
int32_t brick_count = 0;
@@ -6586,7 +5548,7 @@ gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -6600,7 +5562,7 @@ gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("", GF_LOG_ERROR, "Unable to allocate memory");
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
@@ -6608,15 +5570,15 @@ gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_profile(dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_int32_sizen(dict, "op", (int32_t *)&op);
if (ret)
goto out;
- ret = dict_get_int32(dict, "op", (int32_t *)&op);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
goto out;
@@ -6651,11 +5613,7 @@ gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- ret = dict_get_int32(dict, "info-op", (int32_t *)&info_op);
- if (ret)
- goto out;
-
- ret = dict_get_int32(dict, "count", &brick_count);
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
if (ret)
goto out;
@@ -6664,9 +5622,13 @@ gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
+ ret = dict_get_int32_sizen(dict, "info-op", (int32_t *)&info_op);
+ if (ret)
+ goto out;
+
while (i <= brick_count) {
- snprintf(key, sizeof(key), "%d-brick", i);
- ret = dict_get_str(dict, key, &brick);
+ len = snprintf(key, sizeof(key), "%d-brick", i);
+ ret = dict_get_strn(dict, key, len, &brick);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Couldn't get brick name");
goto out;
@@ -6675,28 +5637,28 @@ gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_get_str_boolean(dict, "nfs", _gf_false);
if (ret)
- snprintf(str, sizeof(str), "NFS Server : %s", brick);
+ len = snprintf(str, sizeof(str), "NFS Server : %s", brick);
else
- snprintf(str, sizeof(str), "Brick: %s", brick);
+ len = snprintf(str, sizeof(str), "Brick: %s", brick);
cli_out("%s", str);
- memset(str, '-', strlen(str));
+ memset(str, '-', len);
cli_out("%s", str);
if (GF_CLI_INFO_CLEAR == info_op) {
- snprintf(key, sizeof(key), "%d-stats-cleared", i);
- ret = dict_get_int32(dict, key, &stats_cleared);
+ len = snprintf(key, sizeof(key), "%d-stats-cleared", i);
+ ret = dict_get_int32n(dict, key, len, &stats_cleared);
if (ret)
goto out;
cli_out(stats_cleared ? "Cleared stats."
: "Failed to clear stats.");
} else {
- snprintf(key, sizeof(key), "%d-cumulative", i);
- ret = dict_get_int32(dict, key, &interval);
+ len = snprintf(key, sizeof(key), "%d-cumulative", i);
+ ret = dict_get_int32n(dict, key, len, &interval);
if (ret == 0)
cmd_profile_volume_brick_out(dict, i, interval);
- snprintf(key, sizeof(key), "%d-interval", i);
- ret = dict_get_int32(dict, key, &interval);
+ len = snprintf(key, sizeof(key), "%d-interval", i);
+ ret = dict_get_int32n(dict, key, len, &interval);
if (ret == 0)
cmd_profile_volume_brick_out(dict, i, interval);
}
@@ -6712,7 +5674,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_profile_volume(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
@@ -6725,22 +5687,18 @@ gf_cli_profile_volume(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(this);
GF_ASSERT(data);
- if (!frame || !this || !data)
- goto out;
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_profile_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -6751,6 +5709,7 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
dict_t *dict = NULL;
gf1_cli_stats_op op = GF_CLI_STATS_NONE;
char key[256] = {0};
+ int keylen;
int i = 0;
int32_t brick_count = 0;
char brick[1024];
@@ -6766,7 +5725,7 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
double time = 0;
int32_t time_sec = 0;
long int time_usec = 0;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char *openfd_str = NULL;
@@ -6784,7 +5743,7 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -6806,11 +5765,11 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("", GF_LOG_ERROR, "Unable to allocate memory");
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
- ret = dict_get_int32(dict, "op", (int32_t *)&op);
+ ret = dict_get_int32_sizen(dict, "op", (int32_t *)&op);
if (op != GF_CLI_STATS_TOP) {
ret = 0;
@@ -6821,16 +5780,16 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_top(dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
goto out;
}
- ret = dict_get_int32(dict, "count", &brick_count);
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
if (ret)
goto out;
- snprintf(key, sizeof(key), "%d-top-op", 1);
- ret = dict_get_int32(dict, key, (int32_t *)&top_op);
+ keylen = snprintf(key, sizeof(key), "%d-top-op", 1);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&top_op);
if (ret)
goto out;
@@ -6838,16 +5797,16 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
while (i < brick_count) {
i++;
- snprintf(brick, sizeof(brick), "%d-brick", i);
- ret = dict_get_str(dict, brick, &bricks);
+ keylen = snprintf(brick, sizeof(brick), "%d-brick", i);
+ ret = dict_get_strn(dict, brick, keylen, &bricks);
if (ret)
goto out;
nfs = dict_get_str_boolean(dict, "nfs", _gf_false);
if (clear_stats) {
- snprintf(key, sizeof(key), "%d-stats-cleared", i);
- ret = dict_get_int32(dict, key, &stats_cleared);
+ keylen = snprintf(key, sizeof(key), "%d-stats-cleared", i);
+ ret = dict_get_int32n(dict, key, keylen, &stats_cleared);
if (ret)
goto out;
cli_out(stats_cleared ? "Cleared stats for %s %s"
@@ -6861,8 +5820,8 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
else
cli_out("Brick: %s", bricks);
- snprintf(key, sizeof(key), "%d-members", i);
- ret = dict_get_int32(dict, key, &members);
+ keylen = snprintf(key, sizeof(key), "%d-members", i);
+ ret = dict_get_int32n(dict, key, keylen, &members);
switch (top_op) {
case GF_CLI_TOP_OPEN:
@@ -6874,13 +5833,12 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_get_uint64(dict, key, &max_nr_open);
if (ret)
goto out;
- snprintf(key, sizeof(key), "%d-max-openfd-time", i);
- ret = dict_get_str(dict, key, &openfd_str);
+ keylen = snprintf(key, sizeof(key), "%d-max-openfd-time", i);
+ ret = dict_get_strn(dict, key, keylen, &openfd_str);
if (ret)
goto out;
- cli_out("Current open fds: %" PRIu64
- ", Max open"
- " fds: %" PRIu64 ", Max openfd time: %s",
+ cli_out("Current open fds: %" PRIu64 ", Max open fds: %" PRIu64
+ ", Max openfd time: %s",
nr_open, max_nr_open, openfd_str);
case GF_CLI_TOP_READ:
case GF_CLI_TOP_WRITE:
@@ -6918,8 +5876,8 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
for (j = 1; j <= members; j++) {
- snprintf(key, sizeof(key), "%d-filename-%d", i, j);
- ret = dict_get_str(dict, key, &filename);
+ keylen = snprintf(key, sizeof(key), "%d-filename-%d", i, j);
+ ret = dict_get_strn(dict, key, keylen, &filename);
if (ret)
break;
snprintf(key, sizeof(key), "%d-value-%d", i, j);
@@ -6928,12 +5886,12 @@ gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
if (top_op == GF_CLI_TOP_READ_PERF ||
top_op == GF_CLI_TOP_WRITE_PERF) {
- snprintf(key, sizeof(key), "%d-time-sec-%d", i, j);
- ret = dict_get_int32(dict, key, (int32_t *)&time_sec);
+ keylen = snprintf(key, sizeof(key), "%d-time-sec-%d", i, j);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&time_sec);
if (ret)
goto out;
- snprintf(key, sizeof(key), "%d-time-usec-%d", i, j);
- ret = dict_get_int32(dict, key, (int32_t *)&time_usec);
+ keylen = snprintf(key, sizeof(key), "%d-time-usec-%d", i, j);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&time_usec);
if (ret)
goto out;
gf_time_fmt(timestr, sizeof timestr, time_sec, gf_timefmt_FT);
@@ -6967,7 +5925,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_top_volume(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
@@ -6980,21 +5938,17 @@ gf_cli_top_volume(call_frame_t *frame, xlator_t *this, void *data)
GF_ASSERT(this);
GF_ASSERT(data);
- if (!frame || !this || !data)
- goto out;
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_top_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int
+static int
gf_cli_getwd_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -7012,7 +5966,7 @@ gf_cli_getwd_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_getwd_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -7037,7 +5991,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_getwd(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
@@ -7056,12 +6010,12 @@ gf_cli_getwd(call_frame_t *frame, xlator_t *this, void *data)
(xdrproc_t)xdr_gf1_cli_getwd_req);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-void
+static void
cli_print_volume_status_mempool(dict_t *dict, char *prefix)
{
int ret = -1;
@@ -7074,16 +6028,18 @@ cli_print_volume_status_mempool(dict_t *dict, char *prefix)
int32_t maxalloc = 0;
uint64_t pool_misses = 0;
int32_t maxstdalloc = 0;
- char key[1024] = {
+ char key[128] = {
+ /* prefix is really small 'brick%d' really */
0,
};
+ int keylen;
int i = 0;
GF_ASSERT(dict);
GF_ASSERT(prefix);
- snprintf(key, sizeof(key), "%s.mempool-count", prefix);
- ret = dict_get_int32(dict, key, &mempool_count);
+ keylen = snprintf(key, sizeof(key), "%s.mempool-count", prefix);
+ ret = dict_get_int32n(dict, key, keylen, &mempool_count);
if (ret)
goto out;
@@ -7096,18 +6052,18 @@ cli_print_volume_status_mempool(dict_t *dict, char *prefix)
"------------");
for (i = 0; i < mempool_count; i++) {
- snprintf(key, sizeof(key), "%s.pool%d.name", prefix, i);
- ret = dict_get_str(dict, key, &name);
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_strn(dict, key, keylen, &name);
if (ret)
goto out;
- snprintf(key, sizeof(key), "%s.pool%d.hotcount", prefix, i);
- ret = dict_get_int32(dict, key, &hotcount);
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &hotcount);
if (ret)
goto out;
- snprintf(key, sizeof(key), "%s.pool%d.coldcount", prefix, i);
- ret = dict_get_int32(dict, key, &coldcount);
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &coldcount);
if (ret)
goto out;
@@ -7121,13 +6077,14 @@ cli_print_volume_status_mempool(dict_t *dict, char *prefix)
if (ret)
goto out;
- snprintf(key, sizeof(key), "%s.pool%d.max_alloc", prefix, i);
- ret = dict_get_int32(dict, key, &maxalloc);
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &maxalloc);
if (ret)
goto out;
- snprintf(key, sizeof(key), "%s.pool%d.max-stdalloc", prefix, i);
- ret = dict_get_int32(dict, key, &maxstdalloc);
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.max-stdalloc", prefix,
+ i);
+ ret = dict_get_int32n(dict, key, keylen, &maxstdalloc);
if (ret)
goto out;
@@ -7146,7 +6103,7 @@ out:
return;
}
-void
+static void
cli_print_volume_status_mem(dict_t *dict, gf_boolean_t notbrick)
{
int ret = -1;
@@ -7154,7 +6111,7 @@ cli_print_volume_status_mem(dict_t *dict, gf_boolean_t notbrick)
char *hostname = NULL;
char *path = NULL;
int online = -1;
- char key[1024] = {
+ char key[64] = {
0,
};
int brick_index_max = -1;
@@ -7165,15 +6122,15 @@ cli_print_volume_status_mem(dict_t *dict, gf_boolean_t notbrick)
GF_ASSERT(dict);
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
goto out;
cli_out("Memory status for volume : %s", volname);
- ret = dict_get_int32(dict, "brick-index-max", &brick_index_max);
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
if (ret)
goto out;
- ret = dict_get_int32(dict, "other-count", &other_count);
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
if (ret)
goto out;
@@ -7278,14 +6235,14 @@ out:
return;
}
-void
+static void
cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
{
int ret = -1;
char *volname = NULL;
int client_count = 0;
int current_count = 0;
- char key[1024] = {
+ char key[64] = {
0,
};
int i = 0;
@@ -7293,7 +6250,6 @@ cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
char *name = NULL;
gf_boolean_t is_fuse_done = _gf_false;
gf_boolean_t is_gfapi_done = _gf_false;
- gf_boolean_t is_tierd_done = _gf_false;
gf_boolean_t is_rebalance_done = _gf_false;
gf_boolean_t is_glustershd_done = _gf_false;
gf_boolean_t is_quotad_done = _gf_false;
@@ -7301,12 +6257,12 @@ cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
GF_ASSERT(dict);
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
goto out;
cli_out("Client connections for volume %s", volname);
- ret = dict_get_int32(dict, "client-count", &client_count);
+ ret = dict_get_int32_sizen(dict, "client-count", &client_count);
if (ret)
goto out;
@@ -7320,7 +6276,7 @@ cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
if (!strncmp(name, "fuse", 4)) {
if (!is_fuse_done) {
is_fuse_done = _gf_true;
- ret = dict_get_int32(dict, "fuse-count", &current_count);
+ ret = dict_get_int32_sizen(dict, "fuse-count", &current_count);
if (ret)
goto out;
total = total + current_count;
@@ -7330,17 +6286,7 @@ cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
} else if (!strncmp(name, "gfapi", 5)) {
if (!is_gfapi_done) {
is_gfapi_done = _gf_true;
- ret = dict_get_int32(dict, "gfapi-count", &current_count);
- if (ret)
- goto out;
- total = total + current_count;
- goto print;
- }
- continue;
- } else if (!strcmp(name, "tierd")) {
- if (!is_tierd_done) {
- is_tierd_done = _gf_true;
- ret = dict_get_int32(dict, "tierd-count", &current_count);
+ ret = dict_get_int32_sizen(dict, "gfapi-count", &current_count);
if (ret)
goto out;
total = total + current_count;
@@ -7350,7 +6296,8 @@ cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
} else if (!strcmp(name, "rebalance")) {
if (!is_rebalance_done) {
is_rebalance_done = _gf_true;
- ret = dict_get_int32(dict, "rebalance-count", &current_count);
+ ret = dict_get_int32_sizen(dict, "rebalance-count",
+ &current_count);
if (ret)
goto out;
total = total + current_count;
@@ -7360,7 +6307,8 @@ cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
} else if (!strcmp(name, "glustershd")) {
if (!is_glustershd_done) {
is_glustershd_done = _gf_true;
- ret = dict_get_int32(dict, "glustershd-count", &current_count);
+ ret = dict_get_int32_sizen(dict, "glustershd-count",
+ &current_count);
if (ret)
goto out;
total = total + current_count;
@@ -7370,7 +6318,8 @@ cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
} else if (!strcmp(name, "quotad")) {
if (!is_quotad_done) {
is_quotad_done = _gf_true;
- ret = dict_get_int32(dict, "quotad-count", &current_count);
+ ret = dict_get_int32_sizen(dict, "quotad-count",
+ &current_count);
if (ret)
goto out;
total = total + current_count;
@@ -7380,7 +6329,7 @@ cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
} else if (!strcmp(name, "snapd")) {
if (!is_snapd_done) {
is_snapd_done = _gf_true;
- ret = dict_get_int32(dict, "snapd-count", &current_count);
+ ret = dict_get_int32_sizen(dict, "snapd-count", &current_count);
if (ret)
goto out;
total = total + current_count;
@@ -7399,7 +6348,7 @@ out:
return;
}
-void
+static void
cli_print_volume_status_clients(dict_t *dict, gf_boolean_t notbrick)
{
int ret = -1;
@@ -7415,7 +6364,7 @@ cli_print_volume_status_clients(dict_t *dict, gf_boolean_t notbrick)
uint64_t bytesread = 0;
uint64_t byteswrite = 0;
uint32_t opversion = 0;
- char key[1024] = {
+ char key[128] = {
0,
};
int i = 0;
@@ -7423,15 +6372,15 @@ cli_print_volume_status_clients(dict_t *dict, gf_boolean_t notbrick)
GF_ASSERT(dict);
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
goto out;
cli_out("Client connections for volume %s", volname);
- ret = dict_get_int32(dict, "brick-index-max", &brick_index_max);
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
if (ret)
goto out;
- ret = dict_get_int32(dict, "other-count", &other_count);
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
if (ret)
goto out;
@@ -7504,7 +6453,8 @@ out:
return;
}
-void
+#ifdef DEBUG /* this function is only used in debug */
+static void
cli_print_volume_status_inode_entry(dict_t *dict, char *prefix)
{
int ret = -1;
@@ -7573,8 +6523,9 @@ cli_print_volume_status_inode_entry(dict_t *dict, char *prefix)
out:
return;
}
+#endif
-void
+static void
cli_print_volume_status_itables(dict_t *dict, char *prefix)
{
int ret = -1;
@@ -7584,15 +6535,25 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix)
uint32_t active_size = 0;
uint32_t lru_size = 0;
uint32_t purge_size = 0;
+ uint32_t lru_limit = 0;
+#ifdef DEBUG
int i = 0;
-
+#endif
GF_ASSERT(dict);
GF_ASSERT(prefix);
+ snprintf(key, sizeof(key), "%s.lru_limit", prefix);
+ ret = dict_get_uint32(dict, key, &lru_limit);
+ if (ret)
+ goto out;
+ cli_out("LRU limit : %u", lru_limit);
+
snprintf(key, sizeof(key), "%s.active_size", prefix);
ret = dict_get_uint32(dict, key, &active_size);
if (ret)
goto out;
+
+#ifdef DEBUG
if (active_size != 0) {
cli_out("Active inodes:");
cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
@@ -7603,11 +6564,17 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix)
cli_print_volume_status_inode_entry(dict, key);
}
cli_out(" ");
+#else
+ cli_out("Active Inodes : %u", active_size);
+
+#endif
snprintf(key, sizeof(key), "%s.lru_size", prefix);
ret = dict_get_uint32(dict, key, &lru_size);
if (ret)
goto out;
+
+#ifdef DEBUG
if (lru_size != 0) {
cli_out("LRU inodes:");
cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
@@ -7618,11 +6585,15 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix)
cli_print_volume_status_inode_entry(dict, key);
}
cli_out(" ");
+#else
+ cli_out("LRU Inodes : %u", lru_size);
+#endif
snprintf(key, sizeof(key), "%s.purge_size", prefix);
ret = dict_get_uint32(dict, key, &purge_size);
if (ret)
goto out;
+#ifdef DEBUG
if (purge_size != 0) {
cli_out("Purged inodes:");
cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
@@ -7632,12 +6603,15 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix)
snprintf(key, sizeof(key), "%s.purge%d", prefix, i);
cli_print_volume_status_inode_entry(dict, key);
}
+#else
+ cli_out("Purge Inodes : %u", purge_size);
+#endif
out:
return;
}
-void
+static void
cli_print_volume_status_inode(dict_t *dict, gf_boolean_t notbrick)
{
int ret = -1;
@@ -7649,7 +6623,7 @@ cli_print_volume_status_inode(dict_t *dict, gf_boolean_t notbrick)
char *path = NULL;
int online = -1;
int conn_count = 0;
- char key[1024] = {
+ char key[64] = {
0,
};
int i = 0;
@@ -7657,15 +6631,15 @@ cli_print_volume_status_inode(dict_t *dict, gf_boolean_t notbrick)
GF_ASSERT(dict);
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
goto out;
cli_out("Inode tables for volume %s", volname);
- ret = dict_get_int32(dict, "brick-index-max", &brick_index_max);
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
if (ret)
goto out;
- ret = dict_get_int32(dict, "other-count", &other_count);
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
if (ret)
goto out;
@@ -7722,7 +6696,7 @@ void
cli_print_volume_status_fdtable(dict_t *dict, char *prefix)
{
int ret = -1;
- char key[1024] = {
+ char key[256] = {
0,
};
int refcount = 0;
@@ -7790,7 +6764,7 @@ out:
return;
}
-void
+static void
cli_print_volume_status_fd(dict_t *dict, gf_boolean_t notbrick)
{
int ret = -1;
@@ -7802,7 +6776,7 @@ cli_print_volume_status_fd(dict_t *dict, gf_boolean_t notbrick)
char *path = NULL;
int online = -1;
int conn_count = 0;
- char key[1024] = {
+ char key[64] = {
0,
};
int i = 0;
@@ -7810,15 +6784,15 @@ cli_print_volume_status_fd(dict_t *dict, gf_boolean_t notbrick)
GF_ASSERT(dict);
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
goto out;
cli_out("FD tables for volume %s", volname);
- ret = dict_get_int32(dict, "brick-index-max", &brick_index_max);
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
if (ret)
goto out;
- ret = dict_get_int32(dict, "other-count", &other_count);
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
if (ret)
goto out;
@@ -7871,7 +6845,7 @@ out:
return;
}
-void
+static void
cli_print_volume_status_call_frame(dict_t *dict, char *prefix)
{
int ret = -1;
@@ -7935,11 +6909,11 @@ cli_print_volume_status_call_frame(dict_t *dict, char *prefix)
cli_out(" Unwind To = %s", unwind_to);
}
-void
+static void
cli_print_volume_status_call_stack(dict_t *dict, char *prefix)
{
int ret = -1;
- char key[1024] = {
+ char key[256] = {
0,
};
int uid = 0;
@@ -7950,7 +6924,7 @@ cli_print_volume_status_call_stack(dict_t *dict, char *prefix)
int count = 0;
int i = 0;
- if (!dict || !prefix)
+ if (!prefix)
return;
snprintf(key, sizeof(key), "%s.uid", prefix);
@@ -8002,7 +6976,7 @@ cli_print_volume_status_call_stack(dict_t *dict, char *prefix)
cli_out(" ");
}
-void
+static void
cli_print_volume_status_callpool(dict_t *dict, gf_boolean_t notbrick)
{
int ret = -1;
@@ -8014,7 +6988,7 @@ cli_print_volume_status_callpool(dict_t *dict, gf_boolean_t notbrick)
char *path = NULL;
int online = -1;
int call_count = 0;
- char key[1024] = {
+ char key[64] = {
0,
};
int i = 0;
@@ -8022,15 +6996,15 @@ cli_print_volume_status_callpool(dict_t *dict, gf_boolean_t notbrick)
GF_ASSERT(dict);
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
goto out;
cli_out("Pending calls for volume %s", volname);
- ret = dict_get_int32(dict, "brick-index-max", &brick_index_max);
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
if (ret)
goto out;
- ret = dict_get_int32(dict, "other-count", &other_count);
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
if (ret)
goto out;
@@ -8107,16 +7081,16 @@ cli_print_volume_status_tasks(dict_t *dict)
};
char *brick = NULL;
- ret = dict_get_str(dict, "volname", &volname);
- if (ret)
- goto out;
-
- ret = dict_get_int32(dict, "tasks", &task_count);
+ ret = dict_get_int32_sizen(dict, "tasks", &task_count);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Failed to get tasks count");
return;
}
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+
cli_out("Task Status of Volume %s", volname);
cli_print_line(CLI_BRICK_STATUS_LINE_LEN);
@@ -8185,7 +7159,7 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
int pid = -1;
uint32_t cmd = 0;
gf_boolean_t notbrick = _gf_false;
- char key[1024] = {
+ char key[64] = {
0,
};
char *hostname = NULL;
@@ -8210,7 +7184,7 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -8232,8 +7206,7 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
else
snprintf(msg, sizeof(msg),
- "Unable to obtain volume "
- "status information.");
+ "Unable to obtain volume status information.");
if (global_state->mode & GLUSTER_MODE_XML) {
if (!local->all)
@@ -8280,33 +7253,25 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD) ||
- (cmd & GF_CLI_STATUS_QUOTAD) || (cmd & GF_CLI_STATUS_SNAPD) ||
- (cmd & GF_CLI_STATUS_BITD) || (cmd & GF_CLI_STATUS_SCRUB) ||
- (cmd & GF_CLI_STATUS_TIERD))
- notbrick = _gf_true;
-
if (global_state->mode & GLUSTER_MODE_XML) {
if (!local->all) {
ret = cli_xml_output_vol_status_begin(local, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto xml_end;
}
}
if (cmd & GF_CLI_STATUS_TASKS) {
ret = cli_xml_output_vol_status_tasks_detail(local, dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Error outputting "
- "to xml");
+ gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
goto xml_end;
}
} else {
ret = cli_xml_output_vol_status(local, dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto xml_end;
}
}
@@ -8315,18 +7280,17 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
if (!local->all) {
ret = cli_xml_output_vol_status_end(local);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
}
goto out;
}
- status.brick = GF_MALLOC(PATH_MAX + 256, gf_common_mt_strdup);
- if (!status.brick) {
- errno = ENOMEM;
- ret = -1;
- goto out;
- }
+ if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD) ||
+ (cmd & GF_CLI_STATUS_QUOTAD) || (cmd & GF_CLI_STATUS_SNAPD) ||
+ (cmd & GF_CLI_STATUS_BITD) || (cmd & GF_CLI_STATUS_SCRUB))
+ notbrick = _gf_true;
+
switch (cmd & GF_CLI_STATUS_MASK) {
case GF_CLI_STATUS_MEM:
cli_print_volume_status_mem(dict, notbrick);
@@ -8360,25 +7324,25 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
break;
}
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
goto out;
- ret = dict_get_int32(dict, "brick-index-max", &brick_index_max);
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
if (ret)
goto out;
- ret = dict_get_int32(dict, "other-count", &other_count);
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
if (ret)
goto out;
index_max = brick_index_max + other_count;
- ret = dict_get_int32(dict, "type", &type);
+ ret = dict_get_int32_sizen(dict, "type", &type);
if (ret)
goto out;
- ret = dict_get_int32(dict, "hot_brick_count", &hot_brick_count);
+ ret = dict_get_int32_sizen(dict, "hot_brick_count", &hot_brick_count);
if (ret)
goto out;
@@ -8389,13 +7353,15 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
"Gluster process", "TCP Port", "RDMA Port", "Online", "Pid");
cli_print_line(CLI_BRICK_STATUS_LINE_LEN);
}
- if (type == GF_CLUSTER_TYPE_TIER) {
- cli_out("Hot Bricks:");
+
+ status.brick = GF_MALLOC(PATH_MAX + 256, gf_common_mt_strdup);
+ if (!status.brick) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
}
+
for (i = 0; i <= index_max; i++) {
- if (type == GF_CLUSTER_TYPE_TIER && i == hot_brick_count) {
- cli_out("Cold Bricks:");
- }
status.rdma_port = 0;
snprintf(key, sizeof(key), "brick%d.hostname", i);
@@ -8411,14 +7377,13 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
/* Brick/not-brick is handled separately here as all
* types of nodes are contained in the default output
*/
- memset(status.brick, 0, PATH_MAX + 255);
+ status.brick[0] = '\0';
if (!strcmp(hostname, "NFS Server") ||
!strcmp(hostname, "Self-heal Daemon") ||
!strcmp(hostname, "Quota Daemon") ||
!strcmp(hostname, "Snapshot Daemon") ||
!strcmp(hostname, "Scrubber Daemon") ||
- !strcmp(hostname, "Bitrot Daemon") ||
- !strcmp(hostname, "Tier Daemon"))
+ !strcmp(hostname, "Bitrot Daemon"))
snprintf(status.brick, PATH_MAX + 255, "%s on %s", hostname, path);
else {
snprintf(key, sizeof(key), "brick%d.rdma_port", i);
@@ -8460,6 +7425,9 @@ gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
} else {
cli_print_brick_status(&status);
}
+
+ /* Allocatated memory using gf_asprintf*/
+ GF_FREE(status.pid_str);
}
cli_out(" ");
@@ -8481,7 +7449,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_status_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -8490,21 +7458,17 @@ gf_cli_status_volume(call_frame_t *frame, xlator_t *this, void *data)
int ret = -1;
dict_t *dict = NULL;
- if (!frame || !this || !data)
- goto out;
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_status_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_STATUS_VOLUME, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning: %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int
+static int
gf_cli_status_volume_all(call_frame_t *frame, xlator_t *this, void *data)
{
int i = 0;
@@ -8540,7 +7504,7 @@ gf_cli_status_volume_all(call_frame_t *frame, xlator_t *this, void *data)
if (ret)
goto out;
- ret = dict_get_int32((dict_t *)vol_dict, "vol_count", &vol_count);
+ ret = dict_get_int32_sizen((dict_t *)vol_dict, "vol_count", &vol_count);
if (ret) {
cli_err("Failed to get names of volumes");
goto out;
@@ -8554,7 +7518,7 @@ gf_cli_status_volume_all(call_frame_t *frame, xlator_t *this, void *data)
// TODO: Pass proper op_* values
ret = cli_xml_output_vol_status_begin(local, 0, 0, NULL);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto xml_end;
}
}
@@ -8570,12 +7534,12 @@ gf_cli_status_volume_all(call_frame_t *frame, xlator_t *this, void *data)
if (!dict)
goto out;
- snprintf(key, sizeof(key), "vol%d", i);
- ret = dict_get_str(vol_dict, key, &volname);
+ ret = snprintf(key, sizeof(key), "vol%d", i);
+ ret = dict_get_strn(vol_dict, key, ret, &volname);
if (ret)
goto out;
- ret = dict_set_str(dict, "volname", volname);
+ ret = dict_set_str_sizen(dict, "volname", volname);
if (ret)
goto out;
@@ -8632,7 +7596,7 @@ gf_cli_mount_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_mount_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -8656,7 +7620,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_mount(call_frame_t *frame, xlator_t *this, void *data)
{
gf1_cli_mount_req req = {
@@ -8686,7 +7650,8 @@ gf_cli_mount(call_frame_t *frame, xlator_t *this, void *data)
(xdrproc_t)xdr_gf1_cli_mount_req);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
@@ -8708,7 +7673,7 @@ gf_cli_umount_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_umount_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -8726,7 +7691,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_umount(call_frame_t *frame, xlator_t *this, void *data)
{
gf1_cli_umount_req req = {
@@ -8740,9 +7705,9 @@ gf_cli_umount(call_frame_t *frame, xlator_t *this, void *data)
dict = data;
- ret = dict_get_str(dict, "path", &req.path);
+ ret = dict_get_str_sizen(dict, "path", &req.path);
if (ret == 0)
- ret = dict_get_int32(dict, "lazy", &req.lazy);
+ ret = dict_get_int32_sizen(dict, "lazy", &req.lazy);
if (ret) {
ret = -1;
@@ -8754,11 +7719,11 @@ gf_cli_umount(call_frame_t *frame, xlator_t *this, void *data)
(xdrproc_t)xdr_gf1_cli_umount_req);
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-void
+static void
cmd_heal_volume_statistics_out(dict_t *dict, int brick)
{
uint64_t num_entries = 0;
@@ -8839,18 +7804,18 @@ out:
return;
}
-void
+static void
cmd_heal_volume_brick_out(dict_t *dict, int brick)
{
uint64_t num_entries = 0;
int ret = 0;
- char key[256] = {0};
+ char key[64] = {0};
char *hostname = NULL;
char *path = NULL;
char *status = NULL;
uint64_t i = 0;
uint32_t time = 0;
- char timestr[32] = {0};
+ char timestr[GF_TIMESTR_SIZE] = {0};
char *shd_status = NULL;
snprintf(key, sizeof key, "%d-hostname", brick);
@@ -8865,7 +7830,7 @@ cmd_heal_volume_brick_out(dict_t *dict, int brick)
snprintf(key, sizeof key, "%d-status", brick);
ret = dict_get_str(dict, key, &status);
- if (status && strlen(status))
+ if (status && status[0] != '\0')
cli_out("Status: %s", status);
snprintf(key, sizeof key, "%d-shd-status", brick);
@@ -8901,12 +7866,12 @@ out:
return;
}
-void
+static void
cmd_heal_volume_statistics_heal_count_out(dict_t *dict, int brick)
{
uint64_t num_entries = 0;
int ret = 0;
- char key[256] = {0};
+ char key[64] = {0};
char *hostname = NULL;
char *path = NULL;
char *status = NULL;
@@ -8943,18 +7908,16 @@ out:
return;
}
-int
+static int
gf_is_cli_heal_get_command(gf_xl_afr_op_t heal_op)
{
/* If the command is get command value is 1 otherwise 0, for
invalid commands -1 */
- int get_cmds[GF_SHD_OP_HEAL_DISABLE + 1] = {
+ static int get_cmds[GF_SHD_OP_HEAL_DISABLE + 1] = {
[GF_SHD_OP_INVALID] = -1,
[GF_SHD_OP_HEAL_INDEX] = 0,
[GF_SHD_OP_HEAL_FULL] = 0,
[GF_SHD_OP_INDEX_SUMMARY] = 1,
- [GF_SHD_OP_HEALED_FILES] = 1,
- [GF_SHD_OP_HEAL_FAILED_FILES] = 1,
[GF_SHD_OP_SPLIT_BRAIN_FILES] = 1,
[GF_SHD_OP_STATISTICS] = 1,
[GF_SHD_OP_STATISTICS_HEAL_COUNT] = 1,
@@ -8983,9 +7946,9 @@ gf_cli_heal_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
int brick_count = 0;
int i = 0;
gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
- char *operation = NULL;
- char *substr = NULL;
- char *heal_op_str = NULL;
+ const char *operation = NULL;
+ const char *substr = NULL;
+ const char *heal_op_str = NULL;
GF_ASSERT(myframe);
@@ -9001,25 +7964,23 @@ gf_cli_heal_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
- ret = dict_get_int32(local->dict, "heal-op", (int32_t *)&heal_op);
+ ret = dict_get_int32_sizen(local->dict, "heal-op", (int32_t *)&heal_op);
// TODO: Proper XML output
//#if (HAVE_LIB_XML)
// if (global_state->mode & GLUSTER_MODE_XML) {
// ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
// rsp.op_errno, rsp.op_errstr);
// if (ret)
- // gf_log ("cli", GF_LOG_ERROR,
- // "Error outputting to xml");
+ // gf_log ("cli", GF_LOG_ERROR, XML_ERROR);
// goto out;
// }
//#endif
- ret = dict_get_str(local->dict, "volname", &volname);
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
if (ret) {
gf_log(frame->this->name, GF_LOG_ERROR, "failed to get volname");
goto out;
@@ -9033,26 +7994,16 @@ gf_cli_heal_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
case GF_SHD_OP_HEAL_INDEX:
operation = "Launching heal operation ";
heal_op_str = "to perform index self heal";
- substr =
- "\nUse heal info commands to check"
- " status.";
+ substr = "\nUse heal info commands to check status.";
break;
case GF_SHD_OP_HEAL_FULL:
operation = "Launching heal operation ";
heal_op_str = "to perform full self heal";
- substr =
- "\nUse heal info commands to check"
- " status.";
+ substr = "\nUse heal info commands to check status.";
break;
case GF_SHD_OP_INDEX_SUMMARY:
heal_op_str = "list of entries to be healed";
break;
- case GF_SHD_OP_HEALED_FILES:
- heal_op_str = "list of healed entries";
- break;
- case GF_SHD_OP_HEAL_FAILED_FILES:
- heal_op_str = "list of heal failed entries";
- break;
case GF_SHD_OP_SPLIT_BRAIN_FILES:
heal_op_str = "list of split brain entries";
break;
@@ -9065,12 +8016,14 @@ gf_cli_heal_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
heal_op_str = "count of entries to be healed per replica";
break;
- /* The below 4 cases are never hit; they're coded only to make
- * compiler warnings go away.*/
case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
case GF_SHD_OP_HEAL_SUMMARY:
+ case GF_SHD_OP_HEALED_FILES:
+ case GF_SHD_OP_HEAL_FAILED_FILES:
+ /* These cases are never hit; they're coded just to silence the
+ * compiler warnings.*/
break;
case GF_SHD_OP_INVALID:
@@ -9120,11 +8073,11 @@ gf_cli_heal_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("", GF_LOG_ERROR, "Unable to allocate memory");
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
- ret = dict_get_int32(dict, "count", &brick_count);
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
if (ret)
goto out;
@@ -9145,8 +8098,6 @@ gf_cli_heal_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
cmd_heal_volume_statistics_heal_count_out(dict, i);
break;
case GF_SHD_OP_INDEX_SUMMARY:
- case GF_SHD_OP_HEALED_FILES:
- case GF_SHD_OP_HEAL_FAILED_FILES:
case GF_SHD_OP_SPLIT_BRAIN_FILES:
for (i = 0; i < brick_count; i++)
cmd_heal_volume_brick_out(dict, i);
@@ -9165,7 +8116,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_heal_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -9174,26 +8125,20 @@ gf_cli_heal_volume(call_frame_t *frame, xlator_t *this, void *data)
int ret = 0;
dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
dict = data;
ret = cli_to_glusterd(&req, frame, gf_cli_heal_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, dict,
GLUSTER_CLI_HEAL_VOLUME, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_statedump_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -9201,9 +8146,7 @@ gf_cli_statedump_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
0,
};
int ret = -1;
- char msg[1024] = {
- 0,
- };
+ char msg[1024] = "Volume statedump successful";
GF_ASSERT(myframe);
@@ -9213,20 +8156,18 @@ gf_cli_statedump_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
gf_log("cli", GF_LOG_DEBUG, "Received response to statedump");
if (rsp.op_ret)
snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
- else
- snprintf(msg, sizeof(msg), "Volume statedump successful");
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str("volStatedump", msg, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -9242,7 +8183,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_statedump_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -9251,23 +8192,18 @@ gf_cli_statedump_volume(call_frame_t *frame, xlator_t *this, void *data)
dict_t *options = NULL;
int ret = -1;
- if (!frame || !this || !data)
- goto out;
-
options = data;
ret = cli_to_glusterd(
&req, frame, gf_cli_statedump_volume_cbk, (xdrproc_t)xdr_gf_cli_req,
options, GLUSTER_CLI_STATEDUMP_VOLUME, this, cli_rpc_prog, NULL);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_list_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -9292,7 +8228,7 @@ gf_cli_list_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -9304,7 +8240,7 @@ gf_cli_list_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Unable to allocate memory");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
@@ -9312,14 +8248,14 @@ gf_cli_list_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_list(dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
if (rsp.op_ret)
cli_err("%s", rsp.op_errstr);
else {
- ret = dict_get_int32(dict, "count", &vol_count);
+ ret = dict_get_int32_sizen(dict, "count", &vol_count);
if (ret)
goto out;
@@ -9328,8 +8264,8 @@ gf_cli_list_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
for (i = 0; i < vol_count; i++) {
- snprintf(key, sizeof(key), "volume%d", i);
- ret = dict_get_str(dict, key, &volname);
+ ret = snprintf(key, sizeof(key), "volume%d", i);
+ ret = dict_get_strn(dict, key, ret, &volname);
if (ret)
goto out;
cli_out("%s", volname);
@@ -9341,10 +8277,13 @@ gf_cli_list_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
out:
cli_cmd_broadcast_response(ret);
gf_free_xdr_cli_rsp(rsp);
+
+ if (dict)
+ dict_unref(dict);
return ret;
}
-int32_t
+static int32_t
gf_cli_list_volume(call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
@@ -9352,19 +8291,14 @@ gf_cli_list_volume(call_frame_t *frame, xlator_t *this, void *data)
0,
}};
- if (!frame || !this)
- goto out;
-
ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog,
GLUSTER_CLI_LIST_VOLUME, NULL, this,
gf_cli_list_volume_cbk, (xdrproc_t)xdr_gf_cli_req);
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
return ret;
}
-int32_t
+static int32_t
gf_cli_clearlocks_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -9373,7 +8307,6 @@ gf_cli_clearlocks_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
};
int ret = -1;
char *lk_summary = NULL;
- char *volname = NULL;
dict_t *dict = NULL;
GF_ASSERT(myframe);
@@ -9384,7 +8317,7 @@ gf_cli_clearlocks_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
gf_log("cli", GF_LOG_DEBUG, "Received response to clear-locks");
@@ -9410,24 +8343,14 @@ gf_cli_clearlocks_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Unable to serialize response dictionary");
- goto out;
- }
-
- ret = dict_get_str(dict, "volname", &volname);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Unable to get volname "
- "from dictionary");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
ret = dict_get_str(dict, "lk-summary", &lk_summary);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Unable to get lock "
- "summary from dictionary");
+ "Unable to get lock summary from dictionary");
goto out;
}
cli_out("Volume clear-locks successful");
@@ -9444,7 +8367,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_clearlocks_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -9453,22 +8376,18 @@ gf_cli_clearlocks_volume(call_frame_t *frame, xlator_t *this, void *data)
dict_t *options = NULL;
int ret = -1;
- if (!frame || !this || !data)
- goto out;
-
options = data;
ret = cli_to_glusterd(
&req, frame, gf_cli_clearlocks_volume_cbk, (xdrproc_t)xdr_gf_cli_req,
options, GLUSTER_CLI_CLRLOCKS_VOLUME, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
cli_snapshot_remove_reply(gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
{
int32_t ret = -1;
@@ -9482,7 +8401,7 @@ cli_snapshot_remove_reply(gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
local = frame->local;
- ret = dict_get_int32(dict, "sub-cmd", &delete_cmd);
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &delete_cmd);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not get sub-cmd");
goto end;
@@ -9494,8 +8413,7 @@ cli_snapshot_remove_reply(gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
rsp->op_errno, rsp->op_errstr);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to create "
- "xml output for delete");
+ "Failed to create xml output for delete");
goto end;
}
}
@@ -9531,8 +8449,7 @@ cli_snapshot_remove_reply(gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
ret = cli_xml_snapshot_delete(local, dict, rsp);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to create "
- "xml output for snapshot delete command");
+ "Failed to create xml output for snapshot delete command");
goto out;
}
/* Error out in case of the op already failed */
@@ -9541,7 +8458,7 @@ cli_snapshot_remove_reply(gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
goto out;
}
} else {
- ret = dict_get_str(dict, "snapname", &snap_name);
+ ret = dict_get_str_sizen(dict, "snapname", &snap_name);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Failed to get snapname");
goto out;
@@ -9560,7 +8477,7 @@ end:
return ret;
}
-int
+static int
cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
{
char buf[PATH_MAX] = "";
@@ -9586,26 +8503,19 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
goto out;
}
- ret = dict_get_int32(dict, "config-command", &config_command);
+ ret = dict_get_int32_sizen(dict, "config-command", &config_command);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not fetch config type");
goto out;
}
- ret = dict_get_str(dict, "volname", &volname);
- /* Ignore the error, as volname is optional */
-
- if (!volname) {
- volname = "System";
- }
-
ret = dict_get_uint64(dict, "snap-max-hard-limit", &hard_limit);
/* Ignore the error, as the key specified is optional */
ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
- ret = dict_get_str(dict, "auto-delete", &auto_delete);
+ ret = dict_get_str_sizen(dict, "auto-delete", &auto_delete);
- ret = dict_get_str(dict, "snap-activate-on-create", &snap_activate);
+ ret = dict_get_str_sizen(dict, "snap-activate-on-create", &snap_activate);
if (!hard_limit && !soft_limit &&
config_command != GF_SNAP_CONFIG_DISPLAY && !auto_delete &&
@@ -9615,13 +8525,19 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
goto out;
}
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ /* Ignore the error, as volname is optional */
+
+ if (!volname) {
+ volname = "System";
+ }
+
switch (config_command) {
case GF_SNAP_CONFIG_TYPE_SET:
if (hard_limit && soft_limit) {
cli_out(
"snapshot config: snap-max-hard-limit "
- "& snap-max-soft-limit for system set "
- "successfully");
+ "& snap-max-soft-limit for system set successfully");
} else if (hard_limit) {
cli_out(
"snapshot config: snap-max-hard-limit "
@@ -9633,13 +8549,9 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
"for %s set successfully",
volname);
} else if (auto_delete) {
- cli_out(
- "snapshot config: auto-delete "
- "successfully set");
+ cli_out("snapshot config: auto-delete successfully set");
} else if (snap_activate) {
- cli_out(
- "snapshot config: activate-on-create "
- "successfully set");
+ cli_out("snapshot config: activate-on-create successfully set");
}
break;
@@ -9648,9 +8560,7 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
ret = dict_get_uint64(dict, "snap-max-hard-limit", &value);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Could not fetch "
- "snap_max_hard_limit for %s",
- volname);
+ "Could not fetch snap_max_hard_limit for %s", volname);
ret = -1;
goto out;
}
@@ -9659,9 +8569,7 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Could not fetch "
- "snap-max-soft-limit for %s",
- volname);
+ "Could not fetch snap-max-soft-limit for %s", volname);
ret = -1;
goto out;
}
@@ -9681,13 +8589,11 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
}
for (i = 0; i < voldisplaycount; i++) {
- snprintf(buf, sizeof(buf), "volume%" PRIu64 "-volname", i);
- ret = dict_get_str(dict, buf, &volname);
+ ret = snprintf(buf, sizeof(buf), "volume%" PRIu64 "-volname",
+ i);
+ ret = dict_get_strn(dict, buf, ret, &volname);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Could not fetch "
- " %s",
- buf);
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
ret = -1;
goto out;
}
@@ -9697,10 +8603,7 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
"volume%" PRIu64 "-snap-max-hard-limit", i);
ret = dict_get_uint64(dict, buf, &value);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Could not fetch "
- " %s",
- buf);
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
ret = -1;
goto out;
}
@@ -9712,8 +8615,7 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
if (ret) {
gf_log("cli", GF_LOG_ERROR,
"Could not fetch"
- " effective snap_max_hard_limit for "
- "%s",
+ " effective snap_max_hard_limit for %s",
volname);
ret = -1;
goto out;
@@ -9724,10 +8626,7 @@ cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
"volume%" PRIu64 "-snap-max-soft-limit", i);
ret = dict_get_uint64(dict, buf, &value);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Could not fetch "
- " %s",
- buf);
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
ret = -1;
goto out;
}
@@ -9752,7 +8651,7 @@ out:
* arg - 0, dict : Response Dictionary.
* arg - 1, prefix str : snaplist.snap{0..}.vol{0..}.*
*/
-int
+static int
cli_get_each_volinfo_in_snap(dict_t *dict, char *keyprefix,
gf_boolean_t snap_driven)
{
@@ -9839,7 +8738,7 @@ out:
* arg - 0, dict : Response dictionary.
* arg - 1, prefix_str : snaplist.snap{0..}.*
*/
-int
+static int
cli_get_volinfo_in_snap(dict_t *dict, char *keyprefix)
{
char key[PATH_MAX] = "";
@@ -9864,8 +8763,7 @@ cli_get_volinfo_in_snap(dict_t *dict, char *keyprefix)
ret = cli_get_each_volinfo_in_snap(dict, key, _gf_true);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Could not list "
- "details of volume in a snap");
+ "Could not list details of volume in a snap");
goto out;
}
cli_out(" ");
@@ -9875,7 +8773,7 @@ out:
return ret;
}
-int
+static int
cli_get_each_snap_info(dict_t *dict, char *prefix_str, gf_boolean_t snap_driven)
{
char key_buffer[PATH_MAX] = "";
@@ -9942,9 +8840,7 @@ cli_get_each_snap_info(dict_t *dict, char *prefix_str, gf_boolean_t snap_driven)
cli_out("%-12s", "Snap Volumes:\n");
ret = cli_get_volinfo_in_snap(dict, prefix_str);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Unable to list details "
- "of the snaps");
+ gf_log("cli", GF_LOG_ERROR, "Unable to list details of the snaps");
goto out;
}
}
@@ -9955,17 +8851,17 @@ out:
/* This is a generic function to print snap related information.
* arg - 0, dict : Response Dictionary
*/
-int
+static int
cli_call_snapshot_info(dict_t *dict, gf_boolean_t bool_snap_driven)
{
int snap_count = 0;
- char key[PATH_MAX] = "";
+ char key[32] = "";
int ret = -1;
int i = 0;
GF_ASSERT(dict);
- ret = dict_get_int32(dict, "snapcount", &snap_count);
+ ret = dict_get_int32_sizen(dict, "snapcount", &snap_count);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Unable to get snapcount");
goto out;
@@ -9990,33 +8886,33 @@ out:
return ret;
}
-int
+static int
cli_get_snaps_in_volume(dict_t *dict)
{
int ret = -1;
int i = 0;
int count = 0;
int avail = 0;
- char key[PATH_MAX] = "";
+ char key[32] = "";
char *get_buffer = NULL;
GF_ASSERT(dict);
- ret = dict_get_str(dict, "origin-volname", &get_buffer);
+ ret = dict_get_str_sizen(dict, "origin-volname", &get_buffer);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not fetch origin-volname");
goto out;
}
cli_out(INDENT_MAIN_HEAD "%s", "Volume Name", ":", get_buffer);
- ret = dict_get_int32(dict, "snapcount", &avail);
+ ret = dict_get_int32_sizen(dict, "snapcount", &avail);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not fetch snapcount");
goto out;
}
cli_out(INDENT_MAIN_HEAD "%d", "Snaps Taken", ":", avail);
- ret = dict_get_int32(dict, "snaps-available", &count);
+ ret = dict_get_int32_sizen(dict, "snaps-available", &count);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not fetch snaps-available");
goto out;
@@ -10038,8 +8934,7 @@ cli_get_snaps_in_volume(dict_t *dict)
ret = cli_get_each_volinfo_in_snap(dict, key, _gf_false);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Could not get volume "
- "related information");
+ "Could not get volume related information");
goto out;
}
@@ -10049,18 +8944,18 @@ out:
return ret;
}
-int
+static int
cli_snapshot_list(dict_t *dict)
{
int snapcount = 0;
- char key[PATH_MAX] = "";
+ char key[32] = "";
int ret = -1;
int i = 0;
char *get_buffer = NULL;
GF_ASSERT(dict);
- ret = dict_get_int32(dict, "snapcount", &snapcount);
+ ret = dict_get_int32_sizen(dict, "snapcount", &snapcount);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not fetch snap count");
goto out;
@@ -10076,7 +8971,7 @@ cli_snapshot_list(dict_t *dict)
goto out;
}
- ret = dict_get_str(dict, key, &get_buffer);
+ ret = dict_get_strn(dict, key, ret, &get_buffer);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not get %s ", key);
goto out;
@@ -10088,7 +8983,7 @@ out:
return ret;
}
-int
+static int
cli_get_snap_volume_status(dict_t *dict, char *key_prefix)
{
int ret = -1;
@@ -10189,11 +9084,11 @@ out:
return ret;
}
-int
+static int
cli_get_single_snap_status(dict_t *dict, char *keyprefix)
{
int ret = -1;
- char key[PATH_MAX] = "";
+ char key[64] = ""; /* keyprefix is ""status.snap0" */
int i = 0;
int volcount = 0;
char *get_buffer = NULL;
@@ -10252,7 +9147,7 @@ out:
return ret;
}
-int32_t
+static int32_t
cli_populate_req_dict_for_delete(dict_t *snap_dict, dict_t *dict, size_t index)
{
int32_t ret = -1;
@@ -10262,11 +9157,10 @@ cli_populate_req_dict_for_delete(dict_t *snap_dict, dict_t *dict, size_t index)
GF_ASSERT(snap_dict);
GF_ASSERT(dict);
- ret = dict_set_int32(snap_dict, "sub-cmd", GF_SNAP_DELETE_TYPE_ITER);
+ ret = dict_set_int32_sizen(snap_dict, "sub-cmd", GF_SNAP_DELETE_TYPE_ITER);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Could not save command "
- "type in snap dictionary");
+ "Could not save command type in snap dictionary");
goto out;
}
@@ -10287,7 +9181,7 @@ cli_populate_req_dict_for_delete(dict_t *snap_dict, dict_t *dict, size_t index)
goto out;
}
- ret = dict_set_int32(snap_dict, "type", GF_SNAP_OPTION_TYPE_DELETE);
+ ret = dict_set_int32_sizen(snap_dict, "type", GF_SNAP_OPTION_TYPE_DELETE);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Failed to save command type");
goto out;
@@ -10302,7 +9196,7 @@ out:
return ret;
}
-int
+static int
cli_populate_req_dict_for_status(dict_t *snap_dict, dict_t *dict, int index)
{
int ret = -1;
@@ -10314,9 +9208,7 @@ cli_populate_req_dict_for_status(dict_t *snap_dict, dict_t *dict, int index)
ret = dict_set_uint32(snap_dict, "sub-cmd", GF_SNAP_STATUS_TYPE_ITER);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Could not save command "
- "type in snap dict");
+ gf_log("cli", GF_LOG_ERROR, "Could not save command type in snap dict");
goto out;
}
@@ -10325,21 +9217,19 @@ cli_populate_req_dict_for_status(dict_t *snap_dict, dict_t *dict, int index)
goto out;
}
- ret = dict_get_str(dict, key, &buffer);
+ ret = dict_get_strn(dict, key, ret, &buffer);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not get snapname");
goto out;
}
- ret = dict_set_str(snap_dict, "snapname", buffer);
+ ret = dict_set_str_sizen(snap_dict, "snapname", buffer);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Could not save snapname "
- "in snap dict");
+ gf_log("cli", GF_LOG_ERROR, "Could not save snapname in snap dict");
goto out;
}
- ret = dict_set_int32(snap_dict, "type", GF_SNAP_OPTION_TYPE_STATUS);
+ ret = dict_set_int32_sizen(snap_dict, "type", GF_SNAP_OPTION_TYPE_STATUS);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not save command type");
goto out;
@@ -10351,7 +9241,7 @@ cli_populate_req_dict_for_status(dict_t *snap_dict, dict_t *dict, int index)
goto out;
}
- ret = dict_set_int32(snap_dict, "hold_vol_locks", _gf_false);
+ ret = dict_set_int32_sizen(snap_dict, "hold_vol_locks", _gf_false);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Setting volume lock flag failed");
goto out;
@@ -10361,10 +9251,9 @@ out:
return ret;
}
-int
+static int
cli_snapshot_status(dict_t *dict, gf_cli_rsp *rsp, call_frame_t *frame)
{
- char key[PATH_MAX] = "";
int ret = -1;
int status_cmd = -1;
cli_local_t *local = NULL;
@@ -10385,8 +9274,7 @@ cli_snapshot_status(dict_t *dict, gf_cli_rsp *rsp, call_frame_t *frame)
rsp->op_errstr);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to set "
- "op_errstr in local dictionary");
+ "Failed to set op_errstr in local dictionary");
goto out;
}
}
@@ -10394,7 +9282,7 @@ cli_snapshot_status(dict_t *dict, gf_cli_rsp *rsp, call_frame_t *frame)
goto out;
}
- ret = dict_get_int32(dict, "sub-cmd", &status_cmd);
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &status_cmd);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not fetch status type");
goto out;
@@ -10406,35 +9294,24 @@ cli_snapshot_status(dict_t *dict, gf_cli_rsp *rsp, call_frame_t *frame)
goto out;
}
- ret = snprintf(key, sizeof(key), "status.snap0");
- if (ret < 0) {
- goto out;
- }
-
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_snapshot_status_single_snap(local, dict, key);
+ ret = cli_xml_snapshot_status_single_snap(local, dict, "status.snap0");
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to create "
- "xml output for snapshot status");
- goto out;
+ "Failed to create xml output for snapshot status");
}
} else {
- ret = cli_get_single_snap_status(dict, key);
+ ret = cli_get_single_snap_status(dict, "status.snap0");
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Could not fetch "
- "status of snap");
- goto out;
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch status of snap");
}
}
- ret = 0;
out:
return ret;
}
-int
+static int
gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
char *snap_name, char *volname, char *snap_uuid,
char *clone_name)
@@ -10484,8 +9361,7 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
}
gf_event(EVENT_SNAPSHOT_CREATED,
- "snapshot_name=%s;"
- "volume_name=%s;snapshot_uuid=%s",
+ "snapshot_name=%s;volume_name=%s;snapshot_uuid=%s",
snap_name, volname, snap_uuid);
ret = 0;
@@ -10512,9 +9388,7 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
}
gf_event(EVENT_SNAPSHOT_ACTIVATED,
- "snapshot_name=%s;"
- "snapshot_uuid=%s",
- snap_name, snap_uuid);
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
ret = 0;
break;
@@ -10540,9 +9414,7 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
}
gf_event(EVENT_SNAPSHOT_DEACTIVATED,
- "snapshot_name=%s;"
- "snapshot_uuid=%s",
- snap_name, snap_uuid);
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
ret = 0;
break;
@@ -10575,15 +9447,14 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
}
gf_event(EVENT_SNAPSHOT_RESTORED,
- "snapshot_name=%s;"
- "snapshot_uuid=%s;volume_name=%s",
+ "snapshot_name=%s;snapshot_uuid=%s;volume_name=%s",
snap_name, snap_uuid, volname);
ret = 0;
break;
case GF_SNAP_OPTION_TYPE_DELETE:
- ret = dict_get_int32(dict, "sub-cmd", &delete_cmd);
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &delete_cmd);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not get sub-cmd");
goto out;
@@ -10619,9 +9490,7 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
}
gf_event(EVENT_SNAPSHOT_DELETED,
- "snapshot_name=%s;"
- "snapshot_uuid=%s",
- snap_name, snap_uuid);
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
ret = 0;
break;
@@ -10639,9 +9508,8 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
if (rsp->op_ret != 0) {
gf_event(EVENT_SNAPSHOT_CLONE_FAILED,
- "snapshot_name=%s;clone_name=%s;"
- "error=%s",
- snap_name, clone_name,
+ "snapshot_name=%s;clone_name=%s;error=%s", snap_name,
+ clone_name,
rsp->op_errstr ? rsp->op_errstr
: "Please check log file for details");
ret = 0;
@@ -10655,9 +9523,8 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
}
gf_event(EVENT_SNAPSHOT_CLONED,
- "snapshot_name=%s;"
- "clone_name=%s;clone_uuid=%s",
- snap_name, clone_name, snap_uuid);
+ "snapshot_name=%s;clone_name=%s;clone_uuid=%s", snap_name,
+ clone_name, snap_uuid);
ret = 0;
break;
@@ -10671,7 +9538,7 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
break;
}
- ret = dict_get_int32(dict, "config-command", &config_command);
+ ret = dict_get_int32_sizen(dict, "config-command", &config_command);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not fetch config type");
goto out;
@@ -10685,8 +9552,9 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
/* These are optional parameters therefore ignore the error */
ret = dict_get_uint64(dict, "snap-max-hard-limit", &hard_limit);
ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
- ret = dict_get_str(dict, "auto-delete", &auto_delete);
- ret = dict_get_str(dict, "snap-activate-on-create", &snap_activate);
+ ret = dict_get_str_sizen(dict, "auto-delete", &auto_delete);
+ ret = dict_get_str_sizen(dict, "snap-activate-on-create",
+ &snap_activate);
if (!hard_limit && !soft_limit && !auto_delete && !snap_activate) {
ret = -1;
@@ -10698,9 +9566,6 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
goto out;
}
- volname = NULL;
- ret = dict_get_str(dict, "volname", &volname);
-
if (hard_limit || soft_limit) {
snprintf(option, sizeof(option), "%s=%" PRIu64,
hard_limit ? "hard_limit" : "soft_limit",
@@ -10711,6 +9576,9 @@ gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
auto_delete ? auto_delete : snap_activate);
}
+ volname = NULL;
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+
snprintf(msg, sizeof(msg), "config_type=%s;%s",
volname ? "volume_config" : "system_config", option);
@@ -10734,7 +9602,7 @@ out:
* Fetch necessary data from dict at one place instead of *
* repeating the same code again and again. *
*/
-int
+static int
gf_cli_snapshot_get_data_from_dict(dict_t *dict, char **snap_name,
char **volname, char **snap_uuid,
int8_t *soft_limit_flag, char **clone_name)
@@ -10744,21 +9612,21 @@ gf_cli_snapshot_get_data_from_dict(dict_t *dict, char **snap_name,
GF_VALIDATE_OR_GOTO("cli", dict, out);
if (snap_name) {
- ret = dict_get_str(dict, "snapname", snap_name);
+ ret = dict_get_str_sizen(dict, "snapname", snap_name);
if (ret) {
gf_log("cli", GF_LOG_DEBUG, "failed to get snapname from dict");
}
}
if (volname) {
- ret = dict_get_str(dict, "volname1", volname);
+ ret = dict_get_str_sizen(dict, "volname1", volname);
if (ret) {
gf_log("cli", GF_LOG_DEBUG, "failed to get volname1 from dict");
}
}
if (snap_uuid) {
- ret = dict_get_str(dict, "snapuuid", snap_uuid);
+ ret = dict_get_str_sizen(dict, "snapuuid", snap_uuid);
if (ret) {
gf_log("cli", GF_LOG_DEBUG, "failed to get snapuuid from dict");
}
@@ -10773,7 +9641,7 @@ gf_cli_snapshot_get_data_from_dict(dict_t *dict, char **snap_name,
}
if (clone_name) {
- ret = dict_get_str(dict, "clonename", clone_name);
+ ret = dict_get_str_sizen(dict, "clonename", clone_name);
if (ret) {
gf_log("cli", GF_LOG_DEBUG, "failed to get clonename from dict");
}
@@ -10784,7 +9652,7 @@ out:
return ret;
}
-int
+static int
gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -10812,8 +9680,7 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log(frame->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
goto out;
}
@@ -10829,7 +9696,7 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
if (ret)
goto out;
- ret = dict_get_int32(dict, "type", &type);
+ ret = dict_get_int32_sizen(dict, "type", &type);
if (ret) {
gf_log(frame->this->name, GF_LOG_ERROR, "failed to get type");
goto out;
@@ -10858,7 +9725,7 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_snapshot(type, dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
goto out;
@@ -10886,10 +9753,8 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- cli_out(
- "snapshot create: success: Snap %s created "
- "successfully",
- snap_name);
+ cli_out("snapshot create: success: Snap %s created successfully",
+ snap_name);
if (soft_limit_flag == 1) {
cli_out(
@@ -10920,10 +9785,8 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- cli_out(
- "snapshot clone: success: Clone %s created "
- "successfully",
- clone_name);
+ cli_out("snapshot clone: success: Clone %s created successfully",
+ clone_name);
ret = 0;
break;
@@ -10942,10 +9805,8 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- cli_out(
- "Snapshot restore: %s: Snap restored "
- "successfully",
- snap_name);
+ cli_out("Snapshot restore: %s: Snap restored successfully",
+ snap_name);
ret = 0;
break;
@@ -10963,10 +9824,8 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- cli_out(
- "Snapshot activate: %s: Snap activated "
- "successfully",
- snap_name);
+ cli_out("Snapshot activate: %s: Snap activated successfully",
+ snap_name);
ret = 0;
break;
@@ -10985,10 +9844,8 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- cli_out(
- "Snapshot deactivate: %s: Snap deactivated "
- "successfully",
- snap_name);
+ cli_out("Snapshot deactivate: %s: Snap deactivated successfully",
+ snap_name);
ret = 0;
break;
@@ -11021,8 +9878,7 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_snapshot_config_display(dict, &rsp);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to display "
- "snapshot config output.");
+ "Failed to display snapshot config output.");
goto out;
}
break;
@@ -11038,9 +9894,7 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_snapshot_list(dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to display "
- "snapshot list");
+ gf_log("cli", GF_LOG_ERROR, "Failed to display snapshot list");
goto out;
}
break;
@@ -11057,8 +9911,7 @@ gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_snapshot_status(dict, &rsp, frame);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to display "
- "snapshot status output.");
+ "Failed to display snapshot status output.");
goto out;
}
break;
@@ -11102,11 +9955,9 @@ gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
local = frame->local;
- ret = dict_get_int32(local->dict, "sub-cmd", &cmd);
+ ret = dict_get_int32_sizen(local->dict, "sub-cmd", &cmd);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to get "
- "sub-cmd");
+ gf_log("cli", GF_LOG_ERROR, "Failed to get sub-cmd");
goto out;
}
@@ -11116,11 +9967,9 @@ gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
goto out;
}
- ret = dict_get_int32(local->dict, "snapcount", &snapcount);
+ ret = dict_get_int32_sizen(local->dict, "snapcount", &snapcount);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Could not get "
- "snapcount");
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapcount");
goto out;
}
@@ -11130,8 +9979,7 @@ gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
local->writer, (xmlChar *)"snapCount", "%d", snapcount);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to write "
- "xml element \"snapCount\"");
+ "Failed to write xml element \"snapCount\"");
goto out;
}
#endif /* HAVE_LIB_XML */
@@ -11142,22 +9990,19 @@ gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
if (cmd == GF_SNAP_DELETE_TYPE_ALL) {
snprintf(question, sizeof(question),
- "System contains %d "
- "snapshot(s).\nDo you still "
+ "System contains %d snapshot(s).\nDo you still "
"want to continue and delete them? ",
snapcount);
} else {
- ret = dict_get_str(local->dict, "volname", &volname);
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to fetch "
- "volname from local dictionary");
+ "Failed to fetch volname from local dictionary");
goto out;
}
snprintf(question, sizeof(question),
- "Volume (%s) contains "
- "%d snapshot(s).\nDo you still want to "
+ "Volume (%s) contains %d snapshot(s).\nDo you still want to "
"continue and delete them? ",
volname, snapcount);
}
@@ -11166,8 +10011,7 @@ gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
if (GF_ANSWER_NO == answer) {
ret = 0;
gf_log("cli", GF_LOG_DEBUG,
- "User cancelled "
- "snapshot delete operation for snap delete");
+ "User cancelled snapshot delete operation for snap delete");
goto out;
}
@@ -11181,8 +10025,7 @@ gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
ret = cli_populate_req_dict_for_delete(snap_dict, local->dict, i);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Could not "
- "populate snap request dictionary");
+ "Could not populate snap request dictionary");
goto out;
}
@@ -11194,8 +10037,7 @@ gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
* snapshots is failed
*/
gf_log("cli", GF_LOG_ERROR,
- "cli_to_glusterd "
- "for snapshot delete failed");
+ "cli_to_glusterd for snapshot delete failed");
goto out;
}
dict_unref(snap_dict);
@@ -11210,7 +10052,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_snapshot_for_status(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -11230,7 +10072,7 @@ gf_cli_snapshot_for_status(call_frame_t *frame, xlator_t *this, void *data)
local = frame->local;
- ret = dict_get_int32(local->dict, "sub-cmd", &cmd);
+ ret = dict_get_int32_sizen(local->dict, "sub-cmd", &cmd);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Failed to get sub-cmd");
goto out;
@@ -11246,7 +10088,7 @@ gf_cli_snapshot_for_status(call_frame_t *frame, xlator_t *this, void *data)
goto out;
}
- ret = dict_get_int32(local->dict, "status.snapcount", &snapcount);
+ ret = dict_get_int32_sizen(local->dict, "status.snapcount", &snapcount);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Could not get snapcount");
goto out;
@@ -11266,8 +10108,7 @@ gf_cli_snapshot_for_status(call_frame_t *frame, xlator_t *this, void *data)
ret = cli_populate_req_dict_for_status(snap_dict, local->dict, i);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Could not "
- "populate snap request dictionary");
+ "Could not populate snap request dictionary");
goto out;
}
@@ -11304,7 +10145,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -11317,24 +10158,18 @@ gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data)
char *err_str = NULL;
int type = -1;
- if (!frame || !this || !data)
- goto out;
-
- if (!frame->local)
+ if (!frame || !frame->local || !this || !data)
goto out;
local = frame->local;
options = data;
- ret = dict_get_int32(local->dict, "type", &type);
-
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_snapshot_begin_composite_op(local);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to begin "
- "snapshot xml composite op");
+ "Failed to begin snapshot xml composite op");
goto out;
}
}
@@ -11343,18 +10178,17 @@ gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data)
(xdrproc_t)xdr_gf_cli_req, options, GLUSTER_CLI_SNAP,
this, cli_rpc_prog, NULL);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "cli_to_glusterd for "
- "snapshot failed");
+ gf_log("cli", GF_LOG_ERROR, "cli_to_glusterd for snapshot failed");
goto xmlend;
}
+ ret = dict_get_int32_sizen(local->dict, "type", &type);
+
if (GF_SNAP_OPTION_TYPE_STATUS == type) {
ret = gf_cli_snapshot_for_status(frame, this, data);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "cli to glusterd "
- "for snapshot status command failed");
+ "cli to glusterd for snapshot status command failed");
}
goto xmlend;
@@ -11364,8 +10198,7 @@ gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data)
ret = gf_cli_snapshot_for_delete(frame, this, data);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "cli to glusterd "
- "for snapshot delete command failed");
+ "cli to glusterd for snapshot delete command failed");
}
goto xmlend;
@@ -11378,25 +10211,23 @@ xmlend:
ret = cli_xml_snapshot_end_composite_op(local);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to end "
- "snapshot xml composite op");
+ "Failed to end snapshot xml composite op");
goto out;
}
}
out:
if (ret && local && GF_SNAP_OPTION_TYPE_STATUS == type) {
- tmp_ret = dict_get_str(local->dict, "op_err_str", &err_str);
+ tmp_ret = dict_get_str_sizen(local->dict, "op_err_str", &err_str);
if (tmp_ret || !err_str) {
cli_err("Snapshot Status : failed: %s",
- "Please "
- "check log file for details");
+ "Please check log file for details");
} else {
cli_err("Snapshot Status : failed: %s", err_str);
- dict_del(local->dict, "op_err_str");
+ dict_del_sizen(local->dict, "op_err_str");
}
}
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
@@ -11407,7 +10238,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_barrier_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -11424,7 +10255,7 @@ gf_cli_barrier_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
gf_log("cli", GF_LOG_DEBUG, "Received response to barrier");
@@ -11446,7 +10277,7 @@ out:
return ret;
}
-int
+static int
gf_cli_barrier_volume(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -11455,22 +10286,18 @@ gf_cli_barrier_volume(call_frame_t *frame, xlator_t *this, void *data)
dict_t *options = NULL;
int ret = -1;
- if (!frame || !this || !data)
- goto out;
-
options = data;
ret = cli_to_glusterd(&req, frame, gf_cli_barrier_volume_cbk,
(xdrproc_t)xdr_gf_cli_req, options,
GLUSTER_CLI_BARRIER_VOLUME, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int32_t
+static int32_t
gf_cli_get_vol_opt_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -11497,27 +10324,23 @@ gf_cli_get_vol_opt_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
gf_log("cli", GF_LOG_DEBUG, "Received response to get volume option");
if (rsp.op_ret) {
if (strcmp(rsp.op_errstr, ""))
- snprintf(msg, sizeof(msg),
- "volume get option: "
- "failed: %s",
+ snprintf(msg, sizeof(msg), "volume get option: failed: %s",
rsp.op_errstr);
else
- snprintf(msg, sizeof(msg),
- "volume get option: "
- "failed");
+ snprintf(msg, sizeof(msg), "volume get option: failed");
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str("volGetopts", msg, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
} else {
cli_err("%s", msg);
@@ -11534,7 +10357,7 @@ gf_cli_get_vol_opt_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR, "Failed rsp_dict unserialization");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
@@ -11542,32 +10365,26 @@ gf_cli_get_vol_opt_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = cli_xml_output_vol_getopts(dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "xml output generation "
- "failed");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
ret = 0;
}
goto out;
}
- ret = dict_get_str(dict, "warning", &value);
+ ret = dict_get_str_sizen(dict, "warning", &value);
if (!ret) {
cli_out("%s", value);
}
- ret = dict_get_int32(dict, "count", &count);
+ ret = dict_get_int32_sizen(dict, "count", &count);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to retrieve count "
- "from the dictionary");
+ "Failed to retrieve count from the dictionary");
goto out;
}
if (count <= 0) {
- gf_log("cli", GF_LOG_ERROR,
- "Value of count :%d is "
- "invalid",
- count);
+ gf_log("cli", GF_LOG_ERROR, "Value of count :%d is invalid", count);
ret = -1;
goto out;
}
@@ -11575,23 +10392,18 @@ gf_cli_get_vol_opt_cbk(struct rpc_req *req, struct iovec *iov, int count,
cli_out("%-40s%-40s", "Option", "Value");
cli_out("%-40s%-40s", "------", "-----");
for (i = 1; i <= count; i++) {
- snprintf(dict_key, sizeof dict_key, "key%d", i);
- ret = dict_get_str(dict, dict_key, &key);
+ ret = snprintf(dict_key, sizeof dict_key, "key%d", i);
+ ret = dict_get_strn(dict, dict_key, ret, &key);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to"
- " retrieve %s from the "
- "dictionary",
- dict_key);
+ "Failed to retrieve %s from the dictionary", dict_key);
goto out;
}
- snprintf(dict_key, sizeof dict_key, "value%d", i);
- ret = dict_get_str(dict, dict_key, &value);
+ ret = snprintf(dict_key, sizeof dict_key, "value%d", i);
+ ret = dict_get_strn(dict, dict_key, ret, &value);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
- "Failed to "
- "retrieve key value for %s from"
- "the dictionary",
+ "Failed to retrieve key value for %s from the dictionary",
dict_key);
goto out;
}
@@ -11613,7 +10425,7 @@ out_nolog:
return ret;
}
-int
+static int
gf_cli_get_vol_opt(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -11622,22 +10434,18 @@ gf_cli_get_vol_opt(call_frame_t *frame, xlator_t *this, void *data)
dict_t *options = NULL;
int ret = -1;
- if (!frame || !this || !data)
- goto out;
-
options = data;
ret = cli_to_glusterd(&req, frame, gf_cli_get_vol_opt_cbk,
(xdrproc_t)xdr_gf_cli_req, options,
GLUSTER_CLI_GET_VOL_OPT, this, cli_rpc_prog, NULL);
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-int
+static int
add_cli_cmd_timeout_to_dict(dict_t *dict)
{
int ret = 0;
@@ -11645,15 +10453,13 @@ add_cli_cmd_timeout_to_dict(dict_t *dict)
if (cli_default_conn_timeout > 120) {
ret = dict_set_uint32(dict, "timeout", cli_default_conn_timeout);
if (ret) {
- gf_log("cli", GF_LOG_INFO,
- "Failed to save"
- "timeout to dict");
+ gf_log("cli", GF_LOG_INFO, "Failed to save timeout to dict");
}
}
return ret;
}
-int
+static int
cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
rpc_clnt_prog_t *prog, struct iobref *iobref)
@@ -11665,12 +10471,7 @@ cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
const char **words = NULL;
cli_local_t *local = NULL;
- if (!this || !frame || !dict) {
- ret = -1;
- goto out;
- }
-
- if (!frame->local) {
+ if (!this || !frame || !frame->local || !dict) {
ret = -1;
goto out;
}
@@ -11687,12 +10488,12 @@ cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
while (words[i])
len += strlen(words[i++]) + 1;
- cmd = GF_CALLOC(1, len, gf_common_mt_char);
-
+ cmd = GF_MALLOC(len + 1, gf_common_mt_char);
if (!cmd) {
ret = -1;
goto out;
}
+ cmd[0] = '\0';
for (i = 0; words[i]; i++) {
strncat(cmd, words[i], len - 1);
@@ -11700,9 +10501,7 @@ cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
strncat(cmd, " ", len - 1);
}
- cmd[len - 1] = '\0';
-
- ret = dict_set_dynstr(dict, "cmd-str", cmd);
+ ret = dict_set_dynstr_sizen(dict, "cmd-str", cmd);
if (ret)
goto out;
@@ -11723,14 +10522,14 @@ out:
return ret;
}
-int
+static int
gf_cli_print_bitrot_scrub_status(dict_t *dict)
{
int i = 1;
int j = 0;
int ret = -1;
int count = 0;
- char key[256] = {
+ char key[64] = {
0,
};
char *volname = NULL;
@@ -11753,51 +10552,42 @@ gf_cli_print_bitrot_scrub_status(dict_t *dict)
int8_t scrub_running = 0;
char *scrub_state_op = NULL;
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "failed to get count value from dictionary");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
gf_log("cli", GF_LOG_TRACE, "failed to get volume name");
- ret = dict_get_str(dict, "features.scrub", &state_scrub);
+ ret = dict_get_str_sizen(dict, "features.scrub", &state_scrub);
if (ret)
gf_log("cli", GF_LOG_TRACE, "failed to get scrub state value");
- ret = dict_get_str(dict, "features.scrub-throttle", &scrub_impact);
+ ret = dict_get_str_sizen(dict, "features.scrub-throttle", &scrub_impact);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get scrub impact "
- "value");
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrub impact value");
- ret = dict_get_str(dict, "features.scrub-freq", &scrub_freq);
+ ret = dict_get_str_sizen(dict, "features.scrub-freq", &scrub_freq);
if (ret)
gf_log("cli", GF_LOG_TRACE, "failed to get scrub -freq value");
- ret = dict_get_str(dict, "bitrot_log_file", &bitrot_log_file);
+ ret = dict_get_str_sizen(dict, "bitrot_log_file", &bitrot_log_file);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get bitrot log file "
- "location");
+ gf_log("cli", GF_LOG_TRACE, "failed to get bitrot log file location");
- ret = dict_get_str(dict, "scrub_log_file", &scrub_log_file);
+ ret = dict_get_str_sizen(dict, "scrub_log_file", &scrub_log_file);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get scrubber log file "
- "location");
-
- ret = dict_get_int32(dict, "count", &count);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "count not get count value from"
- " dictionary");
- goto out;
- }
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubber log file location");
for (i = 1; i <= count; i++) {
- snprintf(key, 256, "scrub-running-%d", i);
+ snprintf(key, sizeof(key), "scrub-running-%d", i);
ret = dict_get_int8(dict, key, &scrub_running);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get scrubbed "
- "files");
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubbed files");
if (scrub_running)
break;
}
@@ -11824,56 +10614,41 @@ gf_cli_print_bitrot_scrub_status(dict_t *dict)
node_name = NULL;
last_scrub = NULL;
scrub_time = 0;
- days = 0;
- hours = 0;
- minutes = 0;
- seconds = 0;
error_count = 0;
scrub_files = 0;
unsigned_files = 0;
- snprintf(key, 256, "node-name-%d", i);
+ snprintf(key, sizeof(key), "node-name-%d", i);
ret = dict_get_str(dict, key, &node_name);
if (ret)
gf_log("cli", GF_LOG_TRACE, "failed to get node-name");
- snprintf(key, 256, "scrubbed-files-%d", i);
+ snprintf(key, sizeof(key), "scrubbed-files-%d", i);
ret = dict_get_uint64(dict, key, &scrub_files);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get scrubbed "
- "files");
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubbed files");
- snprintf(key, 256, "unsigned-files-%d", i);
+ snprintf(key, sizeof(key), "unsigned-files-%d", i);
ret = dict_get_uint64(dict, key, &unsigned_files);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get unsigned "
- "files");
+ gf_log("cli", GF_LOG_TRACE, "failed to get unsigned files");
- snprintf(key, 256, "scrub-duration-%d", i);
+ snprintf(key, sizeof(key), "scrub-duration-%d", i);
ret = dict_get_uint64(dict, key, &scrub_time);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get last scrub "
- "duration");
+ gf_log("cli", GF_LOG_TRACE, "failed to get last scrub duration");
- snprintf(key, 256, "last-scrub-time-%d", i);
+ snprintf(key, sizeof(key), "last-scrub-time-%d", i);
ret = dict_get_str(dict, key, &last_scrub);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get last scrub"
- " time");
- snprintf(key, 256, "error-count-%d", i);
+ gf_log("cli", GF_LOG_TRACE, "failed to get last scrub time");
+ snprintf(key, sizeof(key), "error-count-%d", i);
ret = dict_get_uint64(dict, key, &error_count);
if (ret)
- gf_log("cli", GF_LOG_TRACE,
- "failed to get error "
- "count");
+ gf_log("cli", GF_LOG_TRACE, "failed to get error count");
cli_out("\n%s\n",
- "=========================================="
- "===============");
+ "=========================================================");
cli_out("%s: %s\n", "Node", node_name);
@@ -11902,7 +10677,7 @@ gf_cli_print_bitrot_scrub_status(dict_t *dict)
cli_out("%s:\n", "Corrupted object's [GFID]");
/* Printing list of bad file's (Corrupted object's)*/
for (j = 0; j < error_count; j++) {
- snprintf(key, 256, "quarantine-%d-%d", j, i);
+ snprintf(key, sizeof(key), "quarantine-%d-%d", j, i);
ret = dict_get_str(dict, key, &bad_file_str);
if (!ret) {
cli_out("%s\n", bad_file_str);
@@ -11911,15 +10686,14 @@ gf_cli_print_bitrot_scrub_status(dict_t *dict)
}
}
cli_out("%s\n",
- "=========================================="
- "===============");
+ "=========================================================");
out:
GF_FREE(scrub_state_op);
return 0;
}
-int
+static int
gf_cli_bitrot_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
@@ -11943,7 +10717,7 @@ gf_cli_bitrot_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
- "Failed to decode xdr response");
+ XDR_DECODE_FAIL);
goto out;
}
@@ -11972,81 +10746,67 @@ gf_cli_bitrot_cbk(struct rpc_req *req, struct iovec *iov, int count,
ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "failed to unserialize "
- "req-buffer to dictionary");
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
}
}
gf_log("cli", GF_LOG_DEBUG, "Received resp to bit rot command");
- ret = dict_get_int32(dict, "type", &type);
+ ret = dict_get_int32_sizen(dict, "type", &type);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Failed to get command type");
goto out;
}
+ if ((type == GF_BITROT_CMD_SCRUB_STATUS) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ ret = gf_cli_print_bitrot_scrub_status(dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to print bitrot scrub status");
+ }
+ goto out;
+ }
+
/* Ignoring the error, as using dict val for cli output only */
- ret = dict_get_str(dict, "scrub-value", &scrub_cmd);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
if (ret)
- gf_log("cli", GF_LOG_TRACE, "Failed to get scrub command");
+ gf_log("cli", GF_LOG_TRACE, "failed to get volume name");
- ret = dict_get_str(dict, "volname", &volname);
+ ret = dict_get_str_sizen(dict, "scrub-value", &scrub_cmd);
if (ret)
- gf_log("cli", GF_LOG_TRACE, "failed to get volume name");
+ gf_log("cli", GF_LOG_TRACE, "Failed to get scrub command");
- ret = dict_get_str(dict, "cmd-str", &cmd_str);
+ ret = dict_get_str_sizen(dict, "cmd-str", &cmd_str);
if (ret)
gf_log("cli", GF_LOG_TRACE, "failed to get command string");
if (cmd_str)
cmd_op = strrchr(cmd_str, ' ') + 1;
- if ((type == GF_BITROT_CMD_SCRUB_STATUS) &&
- !(global_state->mode & GLUSTER_MODE_XML)) {
- ret = gf_cli_print_bitrot_scrub_status(dict);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "Failed to print bitrot "
- "scrub status");
- }
- goto out;
- }
-
switch (type) {
case GF_BITROT_OPTION_TYPE_ENABLE:
- cli_out(
- "volume bitrot: success bitrot enabled "
- "for volume %s",
- volname);
+ cli_out("volume bitrot: success bitrot enabled for volume %s",
+ volname);
ret = 0;
goto out;
case GF_BITROT_OPTION_TYPE_DISABLE:
- cli_out(
- "volume bitrot: success bitrot disabled "
- "for volume %s",
- volname);
+ cli_out("volume bitrot: success bitrot disabled for volume %s",
+ volname);
ret = 0;
goto out;
case GF_BITROT_CMD_SCRUB_ONDEMAND:
- cli_out(
- "volume bitrot: scrubber started ondemand "
- "for volume %s",
- volname);
+ cli_out("volume bitrot: scrubber started ondemand for volume %s",
+ volname);
ret = 0;
goto out;
case GF_BITROT_OPTION_TYPE_SCRUB:
if (!strncmp("pause", scrub_cmd, sizeof("pause")))
- cli_out(
- "volume bitrot: scrubber paused "
- "for volume %s",
- volname);
+ cli_out("volume bitrot: scrubber paused for volume %s",
+ volname);
if (!strncmp("resume", scrub_cmd, sizeof("resume")))
- cli_out(
- "volume bitrot: scrubber resumed "
- "for volume %s",
- volname);
+ cli_out("volume bitrot: scrubber resumed for volume %s",
+ volname);
ret = 0;
goto out;
case GF_BITROT_OPTION_TYPE_SCRUB_FREQ:
@@ -12070,7 +10830,7 @@ xml_output:
ret = cli_xml_output_vol_profile(dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
if (ret)
- gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
}
@@ -12088,7 +10848,7 @@ out:
return ret;
}
-int32_t
+static int32_t
gf_cli_bitrot(call_frame_t *frame, xlator_t *this, void *data)
{
gf_cli_req req = {{
@@ -12097,30 +10857,25 @@ gf_cli_bitrot(call_frame_t *frame, xlator_t *this, void *data)
dict_t *options = NULL;
int ret = -1;
- if (!frame || !this || !data)
- goto out;
-
options = data;
ret = cli_to_glusterd(&req, frame, gf_cli_bitrot_cbk,
(xdrproc_t)xdr_gf_cli_req, options,
GLUSTER_CLI_BITROT, this, cli_rpc_prog, NULL);
if (ret) {
- gf_log("cli", GF_LOG_ERROR,
- "cli_to_glusterd for "
- "bitrot failed");
+ gf_log("cli", GF_LOG_ERROR, "cli_to_glusterd for bitrot failed");
goto out;
}
out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
GF_FREE(req.dict.dict_val);
return ret;
}
-struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+static struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_NULL] = {"NULL", NULL},
[GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli_probe},
[GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli_deprobe},
@@ -12166,12 +10921,10 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume},
[GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt},
[GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot},
- [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", gf_cli_attach_tier},
- [GLUSTER_CLI_TIER] = {"TIER", gf_cli_tier},
[GLUSTER_CLI_GET_STATE] = {"GET_STATE", gf_cli_get_state},
[GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", gf_cli_reset_brick},
- [GLUSTER_CLI_REMOVE_TIER_BRICK] = {"DETACH_TIER", gf_cli_remove_tier_brick},
- [GLUSTER_CLI_ADD_TIER_BRICK] = {"ADD_TIER_BRICK", gf_cli_add_tier_brick}};
+ [GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha},
+};
struct rpc_clnt_program cli_prog = {
.progname = "Gluster CLI",
@@ -12181,7 +10934,7 @@ struct rpc_clnt_program cli_prog = {
.proctable = gluster_cli_actors,
};
-struct rpc_clnt_procedure cli_quotad_procs[GF_AGGREGATOR_MAXVALUE] = {
+static struct rpc_clnt_procedure cli_quotad_procs[GF_AGGREGATOR_MAXVALUE] = {
[GF_AGGREGATOR_NULL] = {"NULL", NULL},
[GF_AGGREGATOR_LOOKUP] = {"LOOKUP", NULL},
[GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", cli_quotad_getlimit},
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
index fa79066cb3a..069de75801c 100644
--- a/cli/src/cli-xml-output.c
+++ b/cli/src/cli-xml-output.c
@@ -10,10 +10,10 @@
#include <stdlib.h>
#include "cli.h"
#include "cli1-xdr.h"
-#include "run.h"
-#include "compat.h"
-#include "syscall.h"
-#include "upcall-utils.h"
+#include <glusterfs/run.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/upcall-utils.h>
enum gf_task_types { GF_TASK_TYPE_REBALANCE, GF_TASK_TYPE_REMOVE_BRICK };
@@ -64,7 +64,7 @@ cli_begin_xml_output(xmlTextWriterPtr *writer, xmlDocPtr *doc)
int ret = -1;
*writer = xmlNewTextWriterDoc(doc, 0);
- if (writer == NULL) {
+ if (*writer == NULL) {
ret = -1;
goto out;
}
@@ -1526,7 +1526,6 @@ cli_xml_output_vol_status(cli_local_t *local, dict_t *dict)
gf_boolean_t node_present = _gf_true;
int i;
int type = -1;
- int hot_brick_count = -1;
/* <volume> */
ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volume");
@@ -1564,23 +1563,7 @@ cli_xml_output_vol_status(cli_local_t *local, dict_t *dict)
if (ret)
goto out;
- if (type == GF_CLUSTER_TYPE_TIER) {
- ret = dict_get_int32(dict, "hot_brick_count", &hot_brick_count);
- if (ret)
- goto out;
-
- ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"hotBricks");
- XML_RET_CHECK_AND_GOTO(ret, out);
- }
for (i = 0; i <= index_max; i++) {
- if (type == GF_CLUSTER_TYPE_TIER && i == hot_brick_count) {
- /* </hotBricks>*/
- ret = xmlTextWriterEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterStartElement(local->writer,
- (xmlChar *)"coldBricks");
- XML_RET_CHECK_AND_GOTO(ret, out);
- }
ret = cli_xml_output_vol_status_common(local->writer, dict, i, &online,
&node_present);
if (ret) {
@@ -1646,12 +1629,6 @@ cli_xml_output_vol_status(cli_local_t *local, dict_t *dict)
/* </node> was opened in cli_xml_output_vol_status_common()*/
ret = xmlTextWriterEndElement(local->writer);
XML_RET_CHECK_AND_GOTO(ret, out);
-
- /* </coldBricks>*/
- if (type == GF_CLUSTER_TYPE_TIER && i == brick_index_max) {
- ret = xmlTextWriterEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
- }
}
/* Tasks are only present when a normal volume status call is done on a
@@ -1684,15 +1661,15 @@ cli_xml_output_vol_top_rw_perf(xmlTextWriterPtr writer, dict_t *dict,
int ret = -1;
char *filename = NULL;
uint64_t throughput = 0;
- long int time_sec = 0;
- long int time_usec = 0;
- char timestr[256] = {
+ struct timeval tv = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char key[1024] = {
0,
};
- int len;
/* <file> */
ret = xmlTextWriterStartElement(writer, (xmlChar *)"file");
@@ -1715,19 +1692,16 @@ cli_xml_output_vol_top_rw_perf(xmlTextWriterPtr writer, dict_t *dict,
XML_RET_CHECK_AND_GOTO(ret, out);
snprintf(key, sizeof(key), "%d-time-sec-%d", brick_index, member_index);
- ret = dict_get_int32(dict, key, (int32_t *)&time_sec);
+ ret = dict_get_int32(dict, key, (int32_t *)&tv.tv_sec);
if (ret)
goto out;
snprintf(key, sizeof(key), "%d-time-usec-%d", brick_index, member_index);
- ret = dict_get_int32(dict, key, (int32_t *)&time_usec);
+ ret = dict_get_int32(dict, key, (int32_t *)&tv.tv_usec);
if (ret)
goto out;
- gf_time_fmt(timestr, sizeof timestr, time_sec, gf_timefmt_FT);
- len = strlen(timestr);
- snprintf(timestr + len, sizeof(timestr) - len, ".%" GF_PRI_SUSECONDS,
- time_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"time", "%s",
timestr);
XML_RET_CHECK_AND_GOTO(ret, out);
@@ -1976,11 +1950,11 @@ cli_xml_output_vol_profile_stats(xmlTextWriterPtr writer, dict_t *dict,
XML_RET_CHECK_AND_GOTO(ret, out);
ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"size",
- "%" PRIu32, (1 << i));
+ "%" PRIu32, (1U << i));
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%d-%d-read-%d", brick_index, interval,
- (1 << i));
+ snprintf(key, sizeof(key), "%d-%d-read-%" PRIu32, brick_index, interval,
+ (1U << i));
ret = dict_get_uint64(dict, key, &read_count);
if (ret)
read_count = 0;
@@ -1988,8 +1962,8 @@ cli_xml_output_vol_profile_stats(xmlTextWriterPtr writer, dict_t *dict,
"%" PRIu64, read_count);
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%d-%d-write-%d", brick_index, interval,
- (1 << i));
+ snprintf(key, sizeof(key), "%d-%d-write-%" PRIu32, brick_index,
+ interval, (1U << i));
ret = dict_get_uint64(dict, key, &write_count);
if (ret)
write_count = 0;
@@ -2471,24 +2445,6 @@ cli_xml_output_vol_info(cli_local_t *local, dict_t *dict)
int j = 1;
char *caps __attribute__((unused)) = NULL;
int k __attribute__((unused)) = 0;
- int index = 1;
- int tier_vol_type = 0;
- /* hot dist count is always zero so need for it to be
- * included in the array.*/
- int hot_dist_count = 0;
- values c = 0;
- char *keys[MAX] = {
- [COLD_BRICK_COUNT] = "volume%d.cold_brick_count",
- [COLD_TYPE] = "volume%d.cold_type",
- [COLD_DIST_COUNT] = "volume%d.cold_dist_count",
- [COLD_REPLICA_COUNT] = "volume%d.cold_replica_count",
- [COLD_ARBITER_COUNT] = "volume%d.cold_arbiter_count",
- [COLD_DISPERSE_COUNT] = "volume%d.cold_disperse_count",
- [COLD_REDUNDANCY_COUNT] = "volume%d.cold_redundancy_count",
- [HOT_BRICK_COUNT] = "volume%d.hot_brick_count",
- [HOT_TYPE] = "volume%d.hot_type",
- [HOT_REPLICA_COUNT] = "volume%d.hot_replica_count"};
- int value[MAX] = {};
ret = dict_get_int32(dict, "count", &count);
if (ret)
@@ -2548,8 +2504,9 @@ cli_xml_output_vol_info(cli_local_t *local, dict_t *dict)
ret = dict_get_int32(dict, key, &dist_count);
if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"distCount", "%d", dist_count);
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"distCount", "%d",
+ (brick_count / dist_count));
XML_RET_CHECK_AND_GOTO(ret, out);
snprintf(key, sizeof(key), "volume%d.stripe_count", i);
@@ -2618,297 +2575,53 @@ cli_xml_output_vol_info(cli_local_t *local, dict_t *dict)
local->writer, (xmlChar *)"transport", "%d", transport);
XML_RET_CHECK_AND_GOTO(ret, out);
-#ifdef HAVE_BD_XLATOR
- /* <xlators> */
- ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"xlators");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- for (k = 0;; k++) {
- snprintf(key, sizeof(key), "volume%d.xlator%d", i, k);
- ret = dict_get_str(dict, key, &caps);
- if (ret)
- break;
-
- /* <xlator> */
- ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"xlator");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"name", "%s", caps);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- /* <capabilities> */
- ret = xmlTextWriterStartElement(local->writer,
- (xmlChar *)"capabilities");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- j = 0;
- for (j = 0;; j++) {
- snprintf(key, sizeof(key), "volume%d.xlator%d.caps%d", i, k, j);
- ret = dict_get_str(dict, key, &caps);
- if (ret)
- break;
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"capability", "%s", caps);
- XML_RET_CHECK_AND_GOTO(ret, out);
- }
- /* </capabilities> */
- ret = xmlTextWriterEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
- /* </xlator> */
- ret = xmlTextWriterEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
- }
- ret = xmlTextWriterFullEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
- /* </xlators> */
-#endif
j = 1;
/* <bricks> */
ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"bricks");
XML_RET_CHECK_AND_GOTO(ret, out);
- if (type == GF_CLUSTER_TYPE_TIER) {
- /*the values for hot stripe, disperse and redundancy
- * should not be looped in here as they are zero
- * always */
- for (c = COLD_BRICK_COUNT; c < MAX; c++) {
- snprintf(key, 256, keys[c], i);
- ret = dict_get_int32(dict, key, &value[c]);
- if (ret)
- goto out;
- }
-
- hot_dist_count = (value[HOT_REPLICA_COUNT]
- ? value[HOT_REPLICA_COUNT]
- : 1);
-
- tier_vol_type = get_vol_type(value[HOT_TYPE], hot_dist_count,
- value[HOT_BRICK_COUNT]);
-
- if ((value[HOT_TYPE] != GF_CLUSTER_TYPE_TIER) &&
- (value[HOT_TYPE] > 0) &&
- (hot_dist_count < value[HOT_BRICK_COUNT]))
- tier_vol_type = value[HOT_TYPE] + GF_CLUSTER_TYPE_MAX - 1;
-
- ret = xmlTextWriterStartElement(local->writer,
- (xmlChar *)"hotBricks");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"hotBrickType", "%s",
- vol_type_str[tier_vol_type]);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"hotreplicaCount", "%d",
- value[HOT_REPLICA_COUNT]);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(local->writer,
- (xmlChar *)"hotbrickCount",
- "%d", value[HOT_BRICK_COUNT]);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- if (value[HOT_TYPE] == GF_CLUSTER_TYPE_NONE ||
- value[HOT_TYPE] == GF_CLUSTER_TYPE_TIER) {
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"numberOfBricks", "%d",
- value[HOT_BRICK_COUNT]);
- } else {
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"numberOfBricks", "%d x %d = %d",
- (value[HOT_BRICK_COUNT] / hot_dist_count), hot_dist_count,
- value[HOT_BRICK_COUNT]);
- }
+ while (j <= brick_count) {
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"brick");
XML_RET_CHECK_AND_GOTO(ret, out);
- while (index <= value[HOT_BRICK_COUNT]) {
- snprintf(key, 1024, "volume%d.brick%d", i, index);
- ret = dict_get_str(dict, key, &brick);
- if (ret)
- goto out;
-
- ret = xmlTextWriterStartElement(local->writer,
- (xmlChar *)"brick");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "volume%d.brick%d.uuid", i, index);
- ret = dict_get_str(dict, key, &uuid);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatAttribute(
- local->writer, (xmlChar *)"uuid", "%s", uuid);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatString(local->writer, "%s",
- brick);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"name", "%s", brick);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"hostUuid", "%s", uuid);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- index++;
- }
- ret = xmlTextWriterEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- tier_vol_type = get_vol_type(value[COLD_TYPE],
- value[COLD_DIST_COUNT],
- value[COLD_BRICK_COUNT]);
-
- ret = xmlTextWriterStartElement(local->writer,
- (xmlChar *)"coldBricks");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"coldBrickType", "%s",
- vol_type_str[tier_vol_type]);
+ snprintf(key, sizeof(key), "volume%d.brick%d.uuid", i, j);
+ ret = dict_get_str(dict, key, &uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatAttribute(
+ local->writer, (xmlChar *)"uuid", "%s", uuid);
XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"coldreplicaCount", "%d",
- value[COLD_REPLICA_COUNT]);
+ snprintf(key, sizeof(key), "volume%d.brick%d", i, j);
+ ret = dict_get_str(dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatString(local->writer, "%s", brick);
XML_RET_CHECK_AND_GOTO(ret, out);
ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"coldarbiterCount", "%d",
- value[COLD_ARBITER_COUNT]);
+ local->writer, (xmlChar *)"name", "%s", brick);
XML_RET_CHECK_AND_GOTO(ret, out);
ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"coldbrickCount", "%d",
- value[COLD_BRICK_COUNT]);
+ local->writer, (xmlChar *)"hostUuid", "%s", uuid);
XML_RET_CHECK_AND_GOTO(ret, out);
+ snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", i, j);
+ if (dict_get(dict, key))
+ isArbiter = 1;
+ else
+ isArbiter = 0;
ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"colddisperseCount", "%d",
- value[COLD_DISPERSE_COUNT]);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- if (value[COLD_TYPE] == GF_CLUSTER_TYPE_NONE ||
- value[COLD_TYPE] == GF_CLUSTER_TYPE_TIER) {
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"numberOfBricks", "%d",
- value[COLD_BRICK_COUNT]);
- } else if (value[COLD_TYPE] == GF_CLUSTER_TYPE_DISPERSE) {
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"numberOfBricks",
- " %d x (%d + %d) = %d",
- (value[COLD_BRICK_COUNT] / value[COLD_DIST_COUNT]),
- value[COLD_DISPERSE_COUNT] - value[COLD_REDUNDANCY_COUNT],
- value[COLD_REDUNDANCY_COUNT], value[COLD_BRICK_COUNT]);
- } else {
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"numberOfBricks", "%d x %d = %d",
- (value[COLD_BRICK_COUNT] / value[COLD_DIST_COUNT]),
- value[COLD_DIST_COUNT], value[COLD_BRICK_COUNT]);
- }
+ local->writer, (xmlChar *)"isArbiter", "%d", isArbiter);
XML_RET_CHECK_AND_GOTO(ret, out);
- index = value[HOT_BRICK_COUNT] + 1;
-
- while (index <= brick_count) {
- snprintf(key, 1024, "volume%d.brick%d", i, index);
- ret = dict_get_str(dict, key, &brick);
- if (ret)
- goto out;
-
- ret = xmlTextWriterStartElement(local->writer,
- (xmlChar *)"brick");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "volume%d.brick%d.uuid", i, index);
- ret = dict_get_str(dict, key, &uuid);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatAttribute(
- local->writer, (xmlChar *)"uuid", "%s", uuid);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatString(local->writer, "%s",
- brick);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"name", "%s", brick);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"hostUuid", "%s", uuid);
- XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", i,
- index);
- if (dict_get(dict, key))
- isArbiter = 1;
- else
- isArbiter = 0;
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"isArbiter", "%d", isArbiter);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- index++;
- }
+ /* </brick> */
ret = xmlTextWriterEndElement(local->writer);
XML_RET_CHECK_AND_GOTO(ret, out);
- } else {
- while (j <= brick_count) {
- ret = xmlTextWriterStartElement(local->writer,
- (xmlChar *)"brick");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "volume%d.brick%d.uuid", i, j);
- ret = dict_get_str(dict, key, &uuid);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatAttribute(
- local->writer, (xmlChar *)"uuid", "%s", uuid);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "volume%d.brick%d", i, j);
- ret = dict_get_str(dict, key, &brick);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatString(local->writer, "%s",
- brick);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"name", "%s", brick);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"hostUuid", "%s", uuid);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", i, j);
- if (dict_get(dict, key))
- isArbiter = 1;
- else
- isArbiter = 0;
- ret = xmlTextWriterWriteFormatElement(
- local->writer, (xmlChar *)"isArbiter", "%d", isArbiter);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- /* </brick> */
- ret = xmlTextWriterEndElement(local->writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- j++;
- }
+ j++;
}
/* </bricks> */
ret = xmlTextWriterEndElement(local->writer);
@@ -3064,7 +2777,9 @@ cli_xml_output_peer_hostnames(xmlTextWriterPtr writer, dict_t *dict,
XML_RET_CHECK_AND_GOTO(ret, out);
for (i = 0; i < count; i++) {
- snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
+ ret = snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key, &hostname);
if (ret)
goto out;
@@ -3434,90 +3149,6 @@ out:
gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
-
-int
-cli_xml_output_vol_tier_status(xmlTextWriterPtr writer, dict_t *dict,
- enum gf_task_types task_type)
-{
- int ret = -1;
- int count = 0;
- char *node_name = NULL;
- char *status_str = NULL;
- uint64_t promoted = 0;
- uint64_t demoted = 0;
- int i = 1;
- char key[1024] = {
- 0,
- };
- gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
-
- GF_VALIDATE_OR_GOTO("cli", dict, out);
-
- ret = dict_get_int32(dict, "count", &count);
- if (ret) {
- gf_log("cli", GF_LOG_ERROR, "count not set");
- goto out;
- }
-
- ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"nodeCount", "%d",
- count);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- while (i <= count) {
- promoted = 0;
- node_name = NULL;
- demoted = 0;
-
- ret = xmlTextWriterStartElement(writer, (xmlChar *)"node");
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "node-name-%d", i);
- ret = dict_get_str(dict, key, &node_name);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"nodeName",
- "%s", node_name);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "promoted-%d", i);
- ret = dict_get_uint64(dict, key, &promoted);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"promoted"
- "Files", "%"PRIu64,
- promoted);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "demoted-%d", i);
- ret = dict_get_uint64(dict, key, &demoted);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"demoted"
- "Files", "%"PRIu64,
- demoted);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- snprintf(key, sizeof(key), "status-%d", i);
- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd);
-
- status_str = cli_vol_task_status_str[status_rcd];
-
- ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"statusStr",
- "%s", status_str);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- ret = xmlTextWriterEndElement(writer);
- XML_RET_CHECK_AND_GOTO(ret, out);
-
- i++;
- }
-
-out:
- gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
#endif
int
@@ -3552,12 +3183,6 @@ cli_xml_output_vol_rebalance(gf_cli_defrag_type op, dict_t *dict, int op_ret,
ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"op", "%d", op);
XML_RET_CHECK_AND_GOTO(ret, out);
- if (GF_DEFRAG_CMD_STATUS_TIER == op) {
- ret = cli_xml_output_vol_tier_status(writer, dict,
- GF_TASK_TYPE_REBALANCE);
- if (ret)
- goto out;
- }
if ((GF_DEFRAG_CMD_STOP == op) || (GF_DEFRAG_CMD_STATUS == op)) {
ret = cli_xml_output_vol_rebalance_status(writer, dict,
GF_TASK_TYPE_REBALANCE);
@@ -3580,10 +3205,9 @@ out:
}
int
-cli_xml_output_vol_remove_brick_detach_tier(gf_boolean_t status_op,
- dict_t *dict, int op_ret,
- int op_errno, char *op_errstr,
- const char *op)
+cli_xml_output_vol_remove_brick(gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr,
+ const char *op)
{
#if (HAVE_LIB_XML)
int ret = -1;
@@ -3953,20 +3577,21 @@ cli_xml_output_vol_gsync_status(dict_t *dict, xmlTextWriterPtr writer)
char *volume_next = NULL;
char *slave = NULL;
char *slave_next = NULL;
- char *title_values[] = {"master_node", "", "master_brick", "slave_user",
- "slave", "slave_node", "status", "crawl_status",
- /* last_synced */
- "", "entry", "data", "meta", "failures",
- /* checkpoint_time */
- "", "checkpoint_completed",
- /* checkpoint_completion_time */
- "", "master_node_uuid",
- /* last_synced_utc */
- "last_synced",
- /* checkpoint_time_utc */
- "checkpoint_time",
- /* checkpoint_completion_time_utc */
- "checkpoint_completion_time"};
+ static const char *title_values[] = {
+ "master_node", "", "master_brick", "slave_user", "slave", "slave_node",
+ "status", "crawl_status",
+ /* last_synced */
+ "", "entry", "data", "meta", "failures",
+ /* checkpoint_time */
+ "", "checkpoint_completed",
+ /* checkpoint_completion_time */
+ "", "master_node_uuid",
+ /* last_synced_utc */
+ "last_synced",
+ /* checkpoint_time_utc */
+ "checkpoint_time",
+ /* checkpoint_completion_time_utc */
+ "checkpoint_completion_time"};
GF_ASSERT(dict);
@@ -4577,7 +4202,9 @@ cli_xml_snapshot_info_snap_vol(xmlTextWriterPtr writer, xmlDocPtr doc,
ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapVolume");
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%s.volname", keyprefix);
+ ret = snprintf(key, sizeof(key), "%s.volname", keyprefix);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key, &buffer);
if (ret) {
@@ -4589,7 +4216,9 @@ cli_xml_snapshot_info_snap_vol(xmlTextWriterPtr writer, xmlDocPtr doc,
buffer);
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%s.vol-status", keyprefix);
+ ret = snprintf(key, sizeof(key), "%s.vol-status", keyprefix);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key, &buffer);
if (ret) {
@@ -4604,7 +4233,10 @@ cli_xml_snapshot_info_snap_vol(xmlTextWriterPtr writer, xmlDocPtr doc,
/* If the command is snap_driven then we need to show origin volume
* info. Else this is shown in the start of info display.*/
if (snap_driven) {
- snprintf(key, sizeof(key), "%s.", keyprefix);
+ ret = snprintf(key, sizeof(key), "%s.", keyprefix);
+ if (ret < 0)
+ goto out;
+
ret = cli_xml_snapshot_info_orig_vol(writer, doc, dict, key);
if (ret) {
gf_log("cli", GF_LOG_ERROR,
@@ -4655,7 +4287,9 @@ cli_xml_snapshot_info_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key_buffer, sizeof(key_buffer), "%s.snapname", keyprefix);
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snapname", keyprefix);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key_buffer, &buffer);
if (ret) {
@@ -4667,7 +4301,9 @@ cli_xml_snapshot_info_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
buffer);
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key_buffer, sizeof(key_buffer), "%s.snap-id", keyprefix);
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-id", keyprefix);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key_buffer, &buffer);
if (ret) {
@@ -4679,7 +4315,9 @@ cli_xml_snapshot_info_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
buffer);
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key_buffer, sizeof(key_buffer), "%s.snap-desc", keyprefix);
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-desc", keyprefix);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key_buffer, &buffer);
if (!ret) {
@@ -4691,7 +4329,9 @@ cli_xml_snapshot_info_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
}
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key_buffer, sizeof(key_buffer), "%s.snap-time", keyprefix);
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-time", keyprefix);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key_buffer, &buffer);
if (ret) {
@@ -4703,7 +4343,10 @@ cli_xml_snapshot_info_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
buffer);
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key_buffer, sizeof(key_buffer), "%s.vol-count", keyprefix);
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.vol-count", keyprefix);
+ if (ret < 0)
+ goto out;
+
ret = dict_get_int32(dict, key_buffer, &volcount);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Fail to get snap vol count");
@@ -4717,7 +4360,10 @@ cli_xml_snapshot_info_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
ret = dict_get_int32(dict, key_buffer, &volcount);
/* Display info of each snapshot volume */
for (i = 1; i <= volcount; i++) {
- snprintf(key_buffer, sizeof(key_buffer), "%s.vol%d", keyprefix, i);
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.vol%d", keyprefix,
+ i);
+ if (ret < 0)
+ goto out;
ret = cli_xml_snapshot_info_snap_vol(writer, doc, dict, key_buffer,
snap_driven);
@@ -4841,7 +4487,9 @@ cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
GF_ASSERT(dict);
GF_ASSERT(keyprefix);
- snprintf(key, sizeof(key), "%s.brickcount", keyprefix);
+ ret = snprintf(key, sizeof(key), "%s.brickcount", keyprefix);
+ if (ret < 0)
+ goto out;
ret = dict_get_int32(dict, key, &brickcount);
if (ret) {
@@ -4859,7 +4507,9 @@ cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
ret = xmlTextWriterStartElement(writer, (xmlChar *)"brick");
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, i);
+ ret = snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, i);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key, &buffer);
if (ret) {
@@ -4878,7 +4528,9 @@ cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
buffer);
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%s.brick%d.vgname", keyprefix, i);
+ ret = snprintf(key, sizeof(key), "%s.brick%d.vgname", keyprefix, i);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key, &buffer);
if (ret) {
@@ -4891,7 +4543,9 @@ cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%s.brick%d.status", keyprefix, i);
+ ret = snprintf(key, sizeof(key), "%s.brick%d.status", keyprefix, i);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key, &buffer);
if (ret) {
@@ -4904,7 +4558,9 @@ cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%s.brick%d.pid", keyprefix, i);
+ ret = snprintf(key, sizeof(key), "%s.brick%d.pid", keyprefix, i);
+ if (ret < 0)
+ goto out;
ret = dict_get_int32(dict, key, &pid);
if (ret) {
@@ -4917,7 +4573,9 @@ cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%s.brick%d.data", keyprefix, i);
+ ret = snprintf(key, sizeof(key), "%s.brick%d.data", keyprefix, i);
+ if (ret < 0)
+ goto out;
ret = dict_get_str(dict, key, &buffer);
if (ret) {
@@ -4930,7 +4588,10 @@ cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf(key, sizeof(key), "%s.brick%d.lvsize", keyprefix, i);
+ ret = snprintf(key, sizeof(key), "%s.brick%d.lvsize", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
ret = dict_get_str(dict, key, &buffer);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Unable to get LV Size");
diff --git a/cli/src/cli.c b/cli/src/cli.c
index 3716d2a89ca..a52b39c5fb8 100644
--- a/cli/src/cli.c
+++ b/cli/src/cli.c
@@ -33,35 +33,28 @@
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
#include "cli.h"
#include "cli-quotad-client.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "xlator.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "logging.h"
-#include "dict.h"
-#include "list.h"
-#include "timer.h"
-#include "stack.h"
-#include "revision.h"
-#include "common-utils.h"
-#include "gf-event.h"
-#include "syscall.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/revision.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/call-stub.h>
#include <fnmatch.h>
#include "xdr-generic.h"
-extern int connected;
/* using argp for command line parsing */
const char *argp_program_version =
@@ -78,12 +71,16 @@ const char *argp_program_version =
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
struct rpc_clnt *global_quotad_rpc;
+
struct rpc_clnt *global_rpc;
rpc_clnt_prog_t *cli_rpc_prog;
extern struct rpc_clnt_program cli_prog;
+int cli_default_conn_timeout = 120;
+int cli_ten_minutes_timeout = 600;
+
static int
glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
{
@@ -94,60 +91,91 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
call_pool_t *pool = NULL;
int ret = -1;
+ if (!ctx)
+ return ret;
+
ret = xlator_mem_acct_init(THIS, cli_mt_end);
if (ret != 0) {
+ gf_log("cli", GF_LOG_ERROR, "Memory accounting init failed.");
return ret;
}
+ /* Resetting ret to -1 to so in case of failure
+ * we can relese allocated resource.
+ */
+ ret = -1;
+
ctx->process_uuid = generate_glusterfs_ctx_id();
- if (!ctx->process_uuid)
- return -1;
+ if (!ctx->process_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to generate uuid.");
+ goto out;
+ }
ctx->page_size = 128 * GF_UNIT_KB;
ctx->iobuf_pool = iobuf_pool_new();
- if (!ctx->iobuf_pool)
- return -1;
+ if (!ctx->iobuf_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create iobuf pool.");
+ goto out;
+ }
- ctx->event_pool = event_pool_new(DEFAULT_EVENT_POOL_SIZE,
- STARTING_EVENT_THREADS);
- if (!ctx->event_pool)
- return -1;
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
+ if (!ctx->event_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create event pool.");
+ goto out;
+ }
pool = GF_CALLOC(1, sizeof(call_pool_t), cli_mt_call_pool_t);
- if (!pool)
- return -1;
+ if (!pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create call pool.");
+ goto out;
+ }
/* frame_mem_pool size 112 * 64 */
pool->frame_mem_pool = mem_pool_new(call_frame_t, 32);
- if (!pool->frame_mem_pool)
- return -1;
+ if (!pool->frame_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create frame mem pool.");
+ goto out;
+ }
/* stack_mem_pool size 256 * 128 */
pool->stack_mem_pool = mem_pool_new(call_stack_t, 16);
- if (!pool->stack_mem_pool)
- return -1;
+ if (!pool->stack_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create stack mem pool.");
+ goto out;
+ }
ctx->stub_mem_pool = mem_pool_new(call_stub_t, 16);
- if (!ctx->stub_mem_pool)
- return -1;
+ if (!ctx->stub_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to stub mem pool.");
+ goto out;
+ }
ctx->dict_pool = mem_pool_new(dict_t, 32);
- if (!ctx->dict_pool)
- return -1;
+ if (!ctx->dict_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict pool.");
+ goto out;
+ }
ctx->dict_pair_pool = mem_pool_new(data_pair_t, 512);
- if (!ctx->dict_pair_pool)
- return -1;
+ if (!ctx->dict_pair_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict pair pool.");
+ goto out;
+ }
ctx->dict_data_pool = mem_pool_new(data_t, 512);
- if (!ctx->dict_data_pool)
- return -1;
+ if (!ctx->dict_data_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict data pool.");
+ goto out;
+ }
ctx->logbuf_pool = mem_pool_new(log_buf_t, 256);
- if (!ctx->logbuf_pool)
- return -1;
+ if (!ctx->logbuf_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create logbuf pool.");
+ goto out;
+ }
INIT_LIST_HEAD(&pool->all_frames);
LOCK_INIT(&pool->lock);
@@ -161,7 +189,25 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
lim.rlim_max = RLIM_INFINITY;
setrlimit(RLIMIT_CORE, &lim);
- return 0;
+ ret = 0;
+
+out:
+ if (ret != 0) {
+ if (pool) {
+ mem_pool_destroy(pool->frame_mem_pool);
+ mem_pool_destroy(pool->stack_mem_pool);
+ }
+ GF_FREE(pool);
+ pool = NULL;
+ GF_FREE(ctx->process_uuid);
+ mem_pool_destroy(ctx->stub_mem_pool);
+ mem_pool_destroy(ctx->dict_pool);
+ mem_pool_destroy(ctx->dict_pair_pool);
+ mem_pool_destroy(ctx->dict_data_pool);
+ mem_pool_destroy(ctx->logbuf_pool);
+ }
+
+ return ret;
}
static int
@@ -174,7 +220,6 @@ logging_init(glusterfs_ctx_t *ctx, struct cli_state *state)
/* passing ident as NULL means to use default ident for syslog */
if (gf_log_init(ctx, log_file, NULL) == -1) {
fprintf(stderr, "ERROR: failed to open logfile %s\n", log_file);
- return -1;
}
/* CLI should not have something to DEBUG after the release,
@@ -258,14 +303,14 @@ cli_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
switch (event) {
case RPC_CLNT_CONNECT: {
- cli_cmd_broadcast_connected();
+ cli_cmd_broadcast_connected(_gf_true);
gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
break;
}
case RPC_CLNT_DISCONNECT: {
+ cli_cmd_broadcast_connected(_gf_false);
gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
- connected = 0;
if (!global_state->prompt && global_state->await_connected) {
ret = 1;
cli_out(
@@ -320,6 +365,18 @@ cli_opt_parse(char *opt, struct cli_state *state)
if (strcmp(opt, "") == 0)
return 1;
+ if (strcmp(opt, "help") == 0) {
+ cli_out(
+ " peer help - display help for peer commands\n"
+ " volume help - display help for volume commands\n"
+ " volume bitrot help - display help for volume"
+ " bitrot commands\n"
+ " volume quota help - display help for volume"
+ " quota commands\n"
+ " snapshot help - display help for snapshot commands\n"
+ " global help - list global commands\n");
+ exit(0);
+ }
if (strcmp(opt, "version") == 0) {
cli_out("%s", argp_program_version);
@@ -379,6 +436,12 @@ cli_opt_parse(char *opt, struct cli_state *state)
return 0;
}
+ oarg = strtail(opt, "inet6");
+ if (oarg) {
+ state->address_family = "inet6";
+ return 0;
+ }
+
oarg = strtail(opt, "log-file=");
if (oarg) {
state->log_file = oarg;
@@ -454,7 +517,8 @@ parse_cmdline(int argc, char *argv[], struct cli_state *state)
continue;
ret = cli_opt_parse(opt, state);
if (ret == -1) {
- cli_out("unrecognized option --%s", opt);
+ cli_out("unrecognized option --%s\n", opt);
+ usage();
return ret;
} else if (ret == -2) {
return ret;
@@ -600,9 +664,8 @@ cli_quotad_clnt_rpc_init(void)
global_quotad_rpc = rpc;
out:
- if (ret) {
- if (rpc_opts)
- dict_unref(rpc_opts);
+ if (rpc_opts) {
+ dict_unref(rpc_opts);
}
return rpc;
}
@@ -624,6 +687,15 @@ cli_rpc_init(struct cli_state *state)
this = THIS;
cli_rpc_prog = &cli_prog;
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ /* If address family specified in CLI */
+ if (state->address_family) {
+ addr_family = state->address_family;
+ }
+
/* Connect to glusterd using the specified method, giving preference
* to a unix socket connection. If nothing is specified, connect to
* the default glusterd socket.
@@ -633,7 +705,7 @@ cli_rpc_init(struct cli_state *state)
"Connecting to glusterd using "
"sockfile %s",
state->glusterd_sock);
- ret = rpc_transport_unix_options_build(&options, state->glusterd_sock,
+ ret = rpc_transport_unix_options_build(options, state->glusterd_sock,
0);
if (ret)
goto out;
@@ -643,10 +715,6 @@ cli_rpc_init(struct cli_state *state)
"%s",
state->remote_host);
- options = dict_new();
- if (!options)
- goto out;
-
ret = dict_set_str(options, "remote-host", state->remote_host);
if (ret)
goto out;
@@ -665,7 +733,7 @@ cli_rpc_init(struct cli_state *state)
gf_log("cli", GF_LOG_DEBUG,
"Connecting to glusterd using "
"default socket");
- ret = rpc_transport_unix_options_build(&options,
+ ret = rpc_transport_unix_options_build(options,
DEFAULT_GLUSTERD_SOCKFILE, 0);
if (ret)
goto out;
@@ -683,6 +751,9 @@ cli_rpc_init(struct cli_state *state)
ret = rpc_clnt_start(rpc);
out:
+ if (options)
+ dict_unref(options);
+
if (ret) {
if (rpc)
rpc_clnt_unref(rpc);
@@ -727,8 +798,7 @@ main(int argc, char *argv[])
int ret = -1;
glusterfs_ctx_t *ctx = NULL;
- mem_pools_init_early();
- mem_pools_init_late();
+ mem_pools_init();
ctx = glusterfs_ctx_new();
if (!ctx)
@@ -781,15 +851,11 @@ main(int argc, char *argv[])
if (ret)
goto out;
- ret = cli_cmd_cond_init();
- if (ret)
- goto out;
-
ret = cli_input_init(&state);
if (ret)
goto out;
- ret = event_dispatch(ctx->event_pool);
+ ret = gf_event_dispatch(ctx->event_pool);
out:
// glusterfs_ctx_destroy (ctx);
diff --git a/cli/src/cli.h b/cli/src/cli.h
index 773297e4be8..c0d933e8f8a 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -11,12 +11,13 @@
#define __CLI_H__
#include "rpc-clnt.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "protocol-common.h"
-#include "logging.h"
-#include "quota-common-utils.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/quota-common-utils.h>
#include "cli1-xdr.h"
+#include "gd-common-utils.h"
#if (HAVE_LIB_XML)
#include <libxml/encoding.h>
@@ -39,8 +40,8 @@ enum argp_option_keys {
ARGP_PORT_KEY = 'p',
};
-int cli_default_conn_timeout;
-int cli_ten_minutes_timeout;
+extern int cli_default_conn_timeout;
+extern int cli_ten_minutes_timeout;
typedef enum {
COLD_BRICK_COUNT,
@@ -136,6 +137,7 @@ struct cli_state {
gf_loglevel_t log_level;
char *glusterd_sock;
+ char *address_family;
};
struct cli_local {
@@ -187,6 +189,12 @@ typedef ssize_t (*cli_serialize_t)(struct iovec outmsg, void *args);
extern struct cli_state *global_state; /* use only in readline callback */
+extern struct rpc_clnt *global_quotad_rpc;
+
+extern struct rpc_clnt *global_rpc;
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+
typedef const char *(*cli_selector_t)(void *wcon);
char *
@@ -239,6 +247,19 @@ _cli_err(const char *fmt, ...);
\
} while (0)
+#define usage() \
+ do { \
+ cli_out( \
+ " Usage: gluster [options] <help> <peer>" \
+ " <pool> <volume>\n" \
+ " Options:\n" \
+ " --help Shows the help information\n" \
+ " --version Shows the version\n" \
+ " --print-logdir Shows the log directory\n" \
+ " --print-statedumpdir Shows the state dump directory\n"); \
+ \
+ } while (0)
+
int
cli_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
@@ -252,7 +273,8 @@ int32_t
cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **opt);
+cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **opt, char **errstr);
int32_t
cli_cmd_quota_parse(const char **words, int wordcount, dict_t **opt);
@@ -268,6 +290,10 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
int wordcount, dict_t **options, char **op_errstr);
int32_t
+cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
+
+int32_t
cli_cmd_get_state_parse(struct cli_state *state, const char **words,
int wordcount, dict_t **options, char **op_errstr);
@@ -276,17 +302,6 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
int wordcount, dict_t **options, int *type);
int32_t
-cli_cmd_volume_detach_tier_parse(const char **words, int wordcount,
- dict_t **options, int *question);
-
-int32_t
-cli_cmd_volume_tier_parse(const char **words, int wordcount, dict_t **options);
-
-int32_t
-cli_cmd_volume_old_tier_parse(const char **words, int wordcount,
- dict_t **options);
-
-int32_t
cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
int wordcount, dict_t **options,
int *question, int *brick_count,
@@ -320,11 +335,14 @@ cli_local_get();
void
cli_local_wipe(cli_local_t *local);
+gf_boolean_t
+cli_cmd_connected();
+
int32_t
-cli_cmd_await_connected();
+cli_cmd_await_connected(unsigned timeout);
int32_t
-cli_cmd_broadcast_connected();
+cli_cmd_broadcast_connected(gf_boolean_t status);
int
cli_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
@@ -431,10 +449,9 @@ cli_xml_output_vol_rebalance(gf_cli_defrag_type op, dict_t *dict, int op_ret,
int op_errno, char *op_errstr);
int
-cli_xml_output_vol_remove_brick_detach_tier(gf_boolean_t status_op,
- dict_t *dict, int op_ret,
- int op_errno, char *op_errstr,
- const char *op);
+cli_xml_output_vol_remove_brick(gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr,
+ const char *op);
int
cli_xml_output_vol_replace_brick(dict_t *dict, int op_ret, int op_errno,
@@ -480,9 +497,6 @@ cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno,
int
cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict,
char *key);
-char *
-is_server_debug_xlator(void *myframe);
-
int32_t
cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options,
struct cli_state *state);
diff --git a/cli/src/input.c b/cli/src/input.c
index 0583f648f95..5ac1a20edb1 100644
--- a/cli/src/input.c
+++ b/cli/src/input.c
@@ -45,6 +45,9 @@ cli_input(void *d)
state = d;
+ fprintf(stderr,
+ "Welcome to gluster prompt, type 'help' to see the available "
+ "commands.\n");
for (;;) {
printf("%s", state->prompt);
diff --git a/configure.ac b/configure.ac
index 43e8da3c2b2..e2d6fd66cec 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@ AC_INIT([glusterfs],
AC_SUBST([PACKAGE_RELEASE],
[m4_esyscmd([build-aux/pkg-version --release])])
-AM_INIT_AUTOMAKE(tar-pax)
+AM_INIT_AUTOMAKE([tar-pax foreign])
# Removes warnings when using automake 1.14 around (...but option 'subdir-objects' is disabled )
#but libglusterfs fails to build with contrib (Then are not set up that way?)
@@ -21,15 +21,13 @@ AM_INIT_AUTOMAKE(tar-pax)
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
-if make --help 2>&1 | grep -q no-print-directory; then
- AM_MAKEFLAGS="$AM_MAKEFLAGS --no-print-directory";
-fi
-
AC_CONFIG_HEADERS([config.h site.h])
AC_CONFIG_FILES([Makefile
libglusterfs/Makefile
libglusterfs/src/Makefile
+ libglusterd/Makefile
+ libglusterd/src/Makefile
geo-replication/src/peer_gsec_create
geo-replication/src/peer_mountbroker
geo-replication/src/peer_mountbroker.py
@@ -46,11 +44,8 @@ AC_CONFIG_FILES([Makefile
rpc/rpc-transport/Makefile
rpc/rpc-transport/socket/Makefile
rpc/rpc-transport/socket/src/Makefile
- rpc/rpc-transport/rdma/Makefile
- rpc/rpc-transport/rdma/src/Makefile
rpc/xdr/Makefile
rpc/xdr/src/Makefile
- rpc/xdr/gen/Makefile
xlators/Makefile
xlators/meta/Makefile
xlators/meta/src/Makefile
@@ -81,16 +76,12 @@ AC_CONFIG_FILES([Makefile
xlators/performance/io-threads/src/Makefile
xlators/performance/io-cache/Makefile
xlators/performance/io-cache/src/Makefile
- xlators/performance/symlink-cache/Makefile
- xlators/performance/symlink-cache/src/Makefile
xlators/performance/quick-read/Makefile
xlators/performance/quick-read/src/Makefile
xlators/performance/open-behind/Makefile
xlators/performance/open-behind/src/Makefile
xlators/performance/md-cache/Makefile
xlators/performance/md-cache/src/Makefile
- xlators/performance/decompounder/Makefile
- xlators/performance/decompounder/src/Makefile
xlators/performance/nl-cache/Makefile
xlators/performance/nl-cache/src/Makefile
xlators/debug/Makefile
@@ -171,14 +162,13 @@ AC_CONFIG_FILES([Makefile
xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile
xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile
xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile
+ xlators/features/metadisp/Makefile
+ xlators/features/metadisp/src/Makefile
xlators/playground/Makefile
xlators/playground/template/Makefile
xlators/playground/template/src/Makefile
- xlators/encryption/Makefile
- xlators/encryption/rot-13/Makefile
- xlators/encryption/rot-13/src/Makefile
- xlators/encryption/crypt/Makefile
- xlators/encryption/crypt/src/Makefile
xlators/system/Makefile
xlators/system/posix-acl/Makefile
xlators/system/posix-acl/src/Makefile
@@ -203,10 +193,15 @@ AC_CONFIG_FILES([Makefile
extras/init.d/glustereventsd-Debian
extras/init.d/glustereventsd-Redhat
extras/init.d/glustereventsd-FreeBSD
+ extras/ganesha/Makefile
+ extras/ganesha/config/Makefile
+ extras/ganesha/scripts/Makefile
+ extras/ganesha/ocf/Makefile
extras/systemd/Makefile
extras/systemd/glusterd.service
extras/systemd/glustereventsd.service
extras/systemd/glusterfssharedstorage.service
+ extras/systemd/gluster-ta-volume.service
extras/run-gluster.tmpfiles
extras/benchmarking/Makefile
extras/hook-scripts/Makefile
@@ -233,6 +228,7 @@ AC_CONFIG_FILES([Makefile
extras/hook-scripts/reset/Makefile
extras/hook-scripts/reset/post/Makefile
extras/hook-scripts/reset/pre/Makefile
+ extras/python/Makefile
extras/snap_scheduler/Makefile
events/Makefile
events/src/Makefile
@@ -279,37 +275,78 @@ AC_ARG_ENABLE([debug],
[Enable debug build options.]))
if test "x$enable_debug" = "xyes"; then
BUILD_DEBUG=yes
- CFLAGS="${CFLAGS} -g -rdynamic -O0 -DDEBUG"
+ GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG"
else
BUILD_DEBUG=no
- CFLAGS="${CFLAGS} -g -rdynamic"
fi
+SANITIZER=none
+
AC_ARG_ENABLE([asan],
AC_HELP_STRING([--enable-asan],
[Enable Address Sanitizer support]))
if test "x$enable_asan" = "xyes"; then
- BUILD_ASAN=yes
- AC_CHECK_LIB([asan], [__asan_report_error], ,
- [AC_MSG_ERROR([libasan.so not found, this is required for --enable-asan])])
- GF_CFLAGS="${GF_CFLAGS} -O1 -g -fsanitize=address -fno-omit-frame-pointer"
- dnl -lasan always need to be the first library, otherwise libxml complains
- GF_LDFLAGS="-lasan ${GF_LDFLAGS}"
-else
- BUILD_ASAN=no
+ SANITIZER=asan
+ AC_CHECK_LIB([asan], [__asan_init], ,
+ [AC_MSG_ERROR([--enable-asan requires libasan.so, exiting])])
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=address -fno-omit-frame-pointer"
+ GF_LDFLAGS="${GF_LDFLAGS} -lasan"
fi
-AC_ARG_ENABLE([atan],
+AC_ARG_ENABLE([tsan],
AC_HELP_STRING([--enable-tsan],
- [Enable ThreadSanitizer support]))
+ [Enable Thread Sanitizer support]))
if test "x$enable_tsan" = "xyes"; then
- BUILD_TSAN=yes
+ if test "x$SANITIZER" != "xnone"; then
+ AC_MSG_ERROR([only one sanitizer can be enabled at once])
+ fi
+ SANITIZER=tsan
AC_CHECK_LIB([tsan], [__tsan_init], ,
- [AC_MSG_ERROR([libtsan.so not found, this is required for --enable-tsan])])
- GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=thread"
+ [AC_MSG_ERROR([--enable-tsan requires libtsan.so, exiting])])
+ if test "x$ac_cv_lib_tsan___tsan_init" = xyes; then
+ AC_MSG_CHECKING([whether tsan API can be used])
+ saved_CFLAGS=${CFLAGS}
+ CFLAGS="${CFLAGS} -fsanitize=thread"
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM([
+ [#include <sanitizer/tsan_interface.h>]],
+ [[__tsan_create_fiber(0)]])],
+ [TSAN_API=yes], [TSAN_API=no])
+ AC_MSG_RESULT([$TSAN_API])
+ if test x$TSAN_API = "xyes"; then
+ AC_DEFINE(HAVE_TSAN_API, 1, [Define if tsan API can be used.])
+ fi
+ CFLAGS=${saved_CFLAGS}
+ fi
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=thread -fno-omit-frame-pointer"
GF_LDFLAGS="${GF_LDFLAGS} -ltsan"
-else
- BUILD_TSAN=no
+fi
+
+AC_ARG_ENABLE([ubsan],
+ AC_HELP_STRING([--enable-ubsan],
+ [Enable Undefined Behavior Sanitizer support]))
+if test "x$enable_ubsan" = "xyes"; then
+ if test "x$SANITIZER" != "xnone"; then
+ AC_MSG_ERROR([only one sanitizer can be enabled at once])
+ fi
+ SANITIZER=ubsan
+ AC_CHECK_LIB([ubsan], [__ubsan_default_options], ,
+ [AC_MSG_ERROR([--enable-ubsan requires libubsan.so, exiting])])
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=undefined -fno-omit-frame-pointer"
+ GF_LDFLAGS="${GF_LDFLAGS} -lubsan"
+fi
+
+# Initialize CFLAGS before usage
+BUILD_TCMALLOC=no
+AC_ARG_ENABLE([tcmalloc],
+ AC_HELP_STRING([--enable-tcmalloc],
+ [Enable linking with tcmalloc library.]))
+if test "x$enable_tcmalloc" = "xyes"; then
+ BUILD_TCMALLOC=yes
+ GF_CFLAGS="${GF_CFLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free"
+ AC_CHECK_LIB([tcmalloc], [malloc], [],
+ [AC_MSG_ERROR([when --enable-tcmalloc is used, tcmalloc library needs to be present])])
+ GF_LDFLAGS="-ltcmalloc ${GF_LDFLAGS}"
fi
@@ -365,19 +402,34 @@ case $host_os in
fi
# OSX version lesser than 9 has llvm/clang optimization issues which leads to various segfaults
if test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -lt 9; then
- CFLAGS="${CFLAGS} -g -O0 -DDEBUG"
+ GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG"
fi
;;
esac
# --enable-valgrind prevents calling dlclose(), this leaks memory
AC_ARG_ENABLE([valgrind],
- AC_HELP_STRING([--enable-valgrind],
- [Enable valgrind for resource leak debugging.]))
-if test "x$enable_valgrind" = "xyes"; then
- AC_DEFINE(RUN_WITH_VALGRIND, 1, [define if all processes should run under valgrind])
-fi
-
+ AC_HELP_STRING([--enable-valgrind@<:@=memcheck,drd@:>@],
+ [Enable valgrind for resource leak (memcheck, which is
+ the default) or thread synchronization (drd) debugging.]))
+case x$enable_valgrind in
+ xmemcheck|xyes)
+ AC_DEFINE(RUN_WITH_MEMCHECK, 1,
+ [Define if all processes should run under 'valgrind --tool=memcheck'.])
+ VALGRIND_TOOL=memcheck
+ ;;
+ xdrd)
+ AC_DEFINE(RUN_WITH_DRD, 1,
+ [Define if all processes should run under 'valgrind --tool=drd'.])
+ VALGRIND_TOOL=drd
+ ;;
+ x|xno)
+ VALGRIND_TOOL=no
+ ;;
+ *)
+ AC_MSG_ERROR([Please specify --enable-valgrind@<:@=memcheck,drd@:>@])
+ ;;
+esac
AC_ARG_WITH([previous-options],
[AS_HELP_STRING([--with-previous-options],
@@ -495,6 +547,8 @@ AC_CHECK_HEADERS([openssl/dh.h])
AC_CHECK_HEADERS([openssl/ecdh.h])
+AC_CHECK_LIB([ssl], [SSL_CTX_get0_param], [AC_DEFINE([HAVE_SSL_CTX_GET0_PARAM], [1], [define if found OpenSSL SSL_CTX_get0_param])])
+
dnl Math library
AC_CHECK_LIB([m], [pow], [MATH_LIB='-lm'], [MATH_LIB=''])
AC_SUBST(MATH_LIB)
@@ -536,7 +590,7 @@ if test "x$have_uuid" != "xyes"; then
fi
dnl libglusterfs needs uuid.h, practically everything depends on it
-GF_CPPFLAGS="${GF_CPPFLAGS} ${UUID_CFLAGS}"
+GF_CFLAGS="${GF_CFLAGS} ${UUID_CFLAGS}"
dnl PKGCONFIG_UUID is used for the dependency in *.pc.in files
AC_SUBST(PKGCONFIG_UUID)
@@ -638,6 +692,7 @@ AM_PATH_PYTHON([2.6],,[:])
if test -n "${PYTHON}"; then
have_python=yes
fi
+AM_CONDITIONAL(HAVE_PYTHON, test "x$have_python" = "xyes")
dnl Use pkg-config to get runtime search path missing from ${PYTHON}-config
dnl Just do "true" on failure so that configure does not bail out
@@ -674,35 +729,25 @@ if test "x$enable_fuse_client" != "xno"; then
BUILD_FUSE_CLIENT="yes"
fi
-AC_CHECK_LIB([ssl], TLS_method, [HAVE_OPENSSL_1_1="yes"], [HAVE_OPENSSL_1_1="no"])
-if test "x$HAVE_OPENSSL_1_1" = "xyes"; then
- AC_DEFINE([HAVE_TLS_METHOD], [1], [Using OpenSSL-1.1 TLS_method])
-else
- AC_CHECK_LIB([ssl], TLSv1_2_method, [AC_DEFINE([HAVE_TLSV1_2_METHOD], [1], [Using OpenSSL-1.0 TLSv1_2_method])])
-fi
+AC_SUBST(FUSE_CLIENT_SUBDIR)
-# start encryption/crypt section
+AC_ARG_ENABLE([fuse-notifications],
+ AS_HELP_STRING([--disable-fuse-notifications], [Disable FUSE notifications]))
-AC_CHECK_HEADERS([openssl/cmac.h], [have_cmac_h=yes], [have_cmac_h=no])
+AS_IF([test "x$enable_fuse_notifications" != "xno"], [
+ AC_DEFINE([HAVE_FUSE_NOTIFICATIONS], [1], [Use FUSE notifications])
+])
-AC_ARG_ENABLE([crypt-xlator],
- AC_HELP_STRING([--enable-crypt-xlator], [Build crypt encryption xlator]))
+# end FUSE section
-if test "x$enable_crypt_xlator" = "xyes" -a "x$have_cmac_h" = "xno"; then
- AC_MSG_ERROR([Encryption xlator requires OpenSSL with cmac.h])
-fi
-BUILD_CRYPT_XLATOR=no
-if test "x$enable_crypt_xlator" != "xno" -a "x$have_cmac_h" = "xyes"; then
- BUILD_CRYPT_XLATOR=yes
- AC_DEFINE(HAVE_CRYPT_XLATOR, 1, [enable building crypt encryption xlator])
+AC_CHECK_LIB([ssl], TLS_method, [HAVE_OPENSSL_1_1="yes"], [HAVE_OPENSSL_1_1="no"])
+if test "x$HAVE_OPENSSL_1_1" = "xyes"; then
+ AC_DEFINE([HAVE_TLS_METHOD], [1], [Using OpenSSL-1.1 TLS_method])
+else
+ AC_CHECK_LIB([ssl], TLSv1_2_method, [AC_DEFINE([HAVE_TLSV1_2_METHOD], [1], [Using OpenSSL-1.0 TLSv1_2_method])])
fi
-AM_CONDITIONAL([ENABLE_CRYPT_XLATOR], [test x$BUILD_CRYPT_XLATOR = xyes])
-
-AC_SUBST(FUSE_CLIENT_SUBDIR)
-# end FUSE section
-
# FUSERMOUNT section
AC_ARG_ENABLE([fusermount],
@@ -733,53 +778,6 @@ if test "x$enable_epoll" != "xno"; then
fi
# end EPOLL section
-
-# IBVERBS section
-AC_ARG_ENABLE([ibverbs],
- AC_HELP_STRING([--disable-ibverbs],
- [Do not build the ibverbs transport]))
-
-if test "x$enable_ibverbs" != "xno"; then
- AC_CHECK_LIB([ibverbs],
- [ibv_get_device_list],
- [HAVE_LIBIBVERBS="yes"],
- [HAVE_LIBIBVERBS="no"])
- AC_CHECK_LIB([rdmacm], [rdma_create_id], [HAVE_RDMACM="yes"], [HAVE_RDMACM="no"])
- if test "x$HAVE_RDMACM" = "xyes" ; then
- AC_CHECK_DECLS(
- [RDMA_OPTION_ID_REUSEADDR],
- [],
- [AC_ERROR([Need at least version 1.0.15 of librdmacm])],
- [[#include <rdma/rdma_cma.h>]])
- fi
-fi
-
-if test "x$enable_ibverbs" = "xyes"; then
- if test "x$HAVE_LIBIBVERBS" = "xno"; then
- echo "ibverbs-transport requested, but libibverbs is not present."
- exit 1
- fi
-
- if test "x$HAVE_RDMACM" = "xno"; then
- echo "ibverbs-transport requested, but librdmacm is not present."
- exit 1
- fi
-fi
-
-BUILD_RDMA=no
-BUILD_IBVERBS=no
-if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes" -a "x$HAVE_RDMACM" = "xyes"; then
- IBVERBS_SUBDIR=ib-verbs
- BUILD_IBVERBS=yes
- RDMA_SUBDIR=rdma
- BUILD_RDMA=yes
-fi
-
-AC_SUBST(IBVERBS_SUBDIR)
-AC_SUBST(RDMA_SUBDIR)
-# end IBVERBS section
-
-
# SYNCDAEMON section
AC_ARG_ENABLE([georeplication],
AC_HELP_STRING([--disable-georeplication],
@@ -787,6 +785,9 @@ AC_ARG_ENABLE([georeplication],
BUILD_SYNCDAEMON=no
case $host_os in
+ freebsd*)
+#do nothing
+ ;;
linux*)
#do nothing
;;
@@ -826,6 +827,17 @@ fi
AC_SUBST(GEOREP_EXTRAS_SUBDIR)
AM_CONDITIONAL(USE_GEOREP, test "x$enable_georeplication" != "xno")
+# METADISP section
+AC_ARG_ENABLE([metadisp],
+ AC_HELP_STRING([--enable-metadisp],
+ [Enable the metadata dispersal xlator]))
+BUILD_METADISP=no
+if test "x${enable_metadisp}" = "xyes"; then
+ BUILD_METADISP=yes
+fi
+AM_CONDITIONAL([BUILD_METADISP], [test "x$BUILD_METADISP" = "xyes"])
+# end METADISP section
+
# Events section
AC_ARG_ENABLE([events],
AC_HELP_STRING([--disable-events],
@@ -929,6 +941,17 @@ AM_CONDITIONAL([BUILD_AMAZONS3_PLUGIN], [test "x$HAVE_AMAZONS3" = "xyes"])
if test "x$HAVE_AMAZONS3" = "xyes";then
BUILD_CLOUDSYNC="yes"
fi
+BUILD_CVLT_PLUGIN="no"
+case $host_os in
+#enable cvlt plugin only for linux platforms
+ linux*)
+ BUILD_CVLT_PLUGIN="yes"
+ BUILD_CLOUDSYNC="yes"
+ ;;
+ *)
+ ;;
+esac
+AM_CONDITIONAL([BUILD_CVLT_PLUGIN], [test "x$BUILD_CVLT_PLUGIN" = "xyes"])
AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"])
dnl end cloudsync section
@@ -953,6 +976,25 @@ if test "x${have_backtrace}" = "xyes"; then
fi
AC_SUBST(HAVE_BACKTRACE)
+dnl Old (before C11) compiler can compile (but not link) this:
+dnl
+dnl int main () {
+dnl _Static_assert(1, "True");
+dnl return 0;
+dnl }
+dnl
+dnl assuming that _Static_assert is an implicitly declared function. So
+dnl we're trying to link just to make sure that this is not the case.
+
+AC_MSG_CHECKING([whether $CC supports C11 _Static_assert])
+AC_TRY_LINK([], [_Static_assert(1, "True");],
+ [STATIC_ASSERT=yes], [STATIC_ASSERT=no])
+
+AC_MSG_RESULT([$STATIC_ASSERT])
+if test x$STATIC_ASSERT = "xyes"; then
+ AC_DEFINE(HAVE_STATIC_ASSERT, 1, [Define if C11 _Static_assert is supported.])
+fi
+
if test "x${have_backtrace}" != "xyes"; then
AC_TRY_COMPILE([#include <math.h>], [double x=0.0; x=ceil(0.0);],
[],
@@ -960,11 +1002,11 @@ AC_TRY_COMPILE([#include <math.h>], [double x=0.0; x=ceil(0.0);],
fi
dnl glusterfs prints memory usage to stderr by sending it SIGUSR1
-AC_CHECK_FUNC([malloc_stats], [have_malloc_stats=yes])
-if test "x${have_malloc_stats}" = "xyes"; then
- AC_DEFINE(HAVE_MALLOC_STATS, 1, [define if found malloc_stats])
+AC_CHECK_FUNC([mallinfo], [have_mallinfo=yes])
+if test "x${have_mallinfo}" = "xyes"; then
+ AC_DEFINE(HAVE_MALLINFO, 1, [define if found mallinfo])
fi
-AC_SUBST(HAVE_MALLOC_STATS)
+AC_SUBST(HAVE_MALLINFO)
dnl Linux, Solaris, Cygwin
AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec])
@@ -972,7 +1014,7 @@ dnl FreeBSD, NetBSD
AC_CHECK_MEMBERS([struct stat.st_atimespec.tv_nsec])
case $host_os in
*netbsd*)
- CFLAGS="${CFLAGS} -D_INCOMPLETE_XOPEN_C063 -DCONFIG_MACHINE_BSWAP_H"
+ GF_CFLAGS="${GF_CFLAGS} -D_INCOMPLETE_XOPEN_C063 -DCONFIG_MACHINE_BSWAP_H"
;;
esac
AC_CHECK_FUNC([linkat], [have_linkat=yes])
@@ -991,6 +1033,9 @@ case $host_os in
CFLAGS="${CFLAGS} -isystem /usr/local/include"
ARGP_LDADD=-largp
;;
+ *netbsd*)
+ ARGP_LDADD=-largp
+ ;;
esac
dnl argp-standalone does not provide a pkg-config file
AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
@@ -1041,6 +1086,38 @@ if test "x${have_posix_fallocate}" = "xyes"; then
AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists])
fi
+# On fedora-29, copy_file_range syscall and the libc API both are present.
+# Whereas, on some machines such as centos-7, RHEL-7, the API is not there.
+# Only the system call is present. So, this change is to determine whether
+# the API is present or not. If not, then check whether the system call is
+# present or not. Accordingly sys_copy_file_range function will first call
+# the API if it is there. Otherwise it will call syscall(SYS_copy_file_range).
+AC_CHECK_FUNC([copy_file_range], [have_copy_file_range=yes])
+if test "x${have_copy_file_range}" = "xyes"; then
+ AC_DEFINE(HAVE_COPY_FILE_RANGE, 1, [define if copy_file_range exists])
+else
+ OLD_CFLAGS=${CFLAGS}
+ CFLAGS="-D_GNU_SOURCE"
+ AC_CHECK_DECL([SYS_copy_file_range], , , [#include <sys/syscall.h>])
+ if test "x${ac_cv_have_decl_SYS_copy_file_range}" = "xyes"; then
+ AC_DEFINE(HAVE_COPY_FILE_RANGE_SYS, 1, [define if SYS_copy_file_range is available])
+ fi
+ CFLAGS=${OLD_CFLAGS}
+fi
+
+AC_CHECK_FUNC([syncfs], [have_syncfs=yes])
+if test "x${have_syncfs}" = "xyes"; then
+ AC_DEFINE(HAVE_SYNCFS, 1, [define if syncfs exists])
+else
+ OLD_CFLAGS=${CFLAGS}
+ CFLAGS="-D_GNU_SOURCE"
+ AC_CHECK_DECL([SYS_syncfs], , , [#include <sys/syscall.h>])
+ if test "x${ac_cv_have_decl_SYS_syncfs}" = "xyes"; then
+ AC_DEFINE(HAVE_SYNCFS_SYS, 1, [define if SYS_syncfs is available])
+ fi
+ CFLAGS=${OLD_CFLAGS}
+fi
+
BUILD_NANOSECOND_TIMESTAMPS=no
AC_CHECK_FUNC([utimensat], [have_utimensat=yes])
if test "x${have_utimensat}" = "xyes"; then
@@ -1121,28 +1198,21 @@ CFLAGS="-Wformat -Werror=format-security"
AC_MSG_CHECKING([whether $CC accepts -Werror=format-security])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_format_security=yes], [cc_werror_format_security=no])
echo $cc_werror_format_security
-if test "x$cc_werror_format_security" = "xno"; then
- CFLAGS="$saved_CFLAGS"
-else
- CFLAGS="$saved_CFLAGS $CFLAGS"
- GF_CFLAGS="$GF_CFLAGS $CFLAGS"
+if test "x$cc_werror_format_security" = "xyes"; then
+ GF_CFLAGS="$GF_CFLAGS ${CFLAGS}"
fi
+CFLAGS="$saved_CFLAGS"
dnl check for gcc -Werror=implicit-function-declaration
saved_CFLAGS=$CFLAGS
-saved_GF_CFLAGS=$GF_CFLAGS
CFLAGS="-Werror=implicit-function-declaration"
-GF_CFLAGS="-Werror=implicit-function-declaration"
AC_MSG_CHECKING([whether $CC accepts -Werror=implicit-function-declaration])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_implicit=yes], [cc_werror_implicit=no])
echo $cc_werror_implicit
-if test "x$cc_werror_implicit" = "xno"; then
- CFLAGS="$saved_CFLAGS"
- GF_CFLAGS="$saved_GF_CFLAGS"
-else
- CFLAGS="$saved_CFLAGS $CFLAGS"
- GF_CFLAGS="$saved_GF_CFLAGS $GF_CFLAGS"
+if test "x$cc_werror_implicit" = "xyes"; then
+ GF_CFLAGS="${GF_CFLAGS} ${CFLAGS}"
fi
+CFLAGS="$saved_CFLAGS"
dnl clang is mostly GCC-compatible, but its version is much lower,
dnl so we have to check for it.
@@ -1186,7 +1256,6 @@ if test "x$exec_prefix" = xNONE; then
exec_prefix="$(eval echo $prefix)"
fi
GLUSTERFS_LIBEXECDIR="$(eval echo $libexecdir)/glusterfs"
-GLUSTERFSD_MISCDIR="$(eval echo $prefix)/var/lib/misc/glusterfsd"
prefix=$old_prefix
exec_prefix=$old_exec_prefix
@@ -1198,6 +1267,8 @@ fi
localstatedir="$(eval echo ${localstatedir})"
LOCALSTATEDIR=$localstatedir
+GLUSTERFSD_MISCDIR="$(eval echo ${localstatedir})/lib/misc/glusterfsd"
+
old_prefix=$prefix
if test "x$prefix" = xNONE; then
prefix=$ac_default_prefix
@@ -1206,6 +1277,7 @@ GLUSTERD_VOLFILE="$(eval echo ${sysconfdir})/glusterfs/glusterd.vol"
prefix=$old_prefix
+GFAPI_EXTRA_LDFLAGS='-Wl,--version-script=$(top_srcdir)/api/src/gfapi.map'
case $host_os in
linux*)
GF_HOST_OS="GF_LINUX_HOST_OS"
@@ -1238,10 +1310,6 @@ case $host_os in
;;
*freebsd*)
GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${GF_CFLAGS} -O0"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
- GF_CFLAGS="${GF_CFLAGS} -D_LIBGEN_H_"
GF_CFLAGS="${GF_CFLAGS} -DO_DSYNC=0"
GF_CFLAGS="${GF_CFLAGS} -Dxdr_quad_t=xdr_longlong_t"
GF_CFLAGS="${GF_CFLAGS} -Dxdr_u_quad_t=xdr_u_longlong_t"
@@ -1268,16 +1336,8 @@ case $host_os in
BUILD_FUSERMOUNT="no"
FUSERMOUNT_SUBDIR=""
GLUSTERD_WORKDIR="${LOCALSTATEDIR}/db/glusterd"
- ;;
-esac
-
-case $host_os in
- darwin*)
GFAPI_EXTRA_LDFLAGS='-Wl,-alias_list,$(top_srcdir)/api/src/gfapi.aliases'
;;
- *)
- GFAPI_EXTRA_LDFLAGS='-Wl,--version-script=$(top_srcdir)/api/src/gfapi.map'
- ;;
esac
# Default value for sbindir
@@ -1363,13 +1423,18 @@ fi
dnl gnfs section
BUILD_GNFS="no"
+RPCBIND_SERVICE=""
AC_ARG_ENABLE([gnfs],
AC_HELP_STRING([--enable-gnfs],
[Enable legacy gnfs server xlator.]))
if test "x${with_server}" = "xyes" -a "x$enable_gnfs" = "xyes"; then
BUILD_GNFS="yes"
+ GF_CFLAGS="$GF_CFLAGS -DBUILD_GNFS"
+ RPCBIND_SERVICE="rpcbind.service"
fi
AM_CONDITIONAL([BUILD_GNFS], [test x$BUILD_GNFS = xyes])
+AC_SUBST(BUILD_GNFS)
+AC_SUBST(RPCBIND_SERVICE)
dnl end gnfs section
dnl Check for userspace-rcu
@@ -1379,10 +1444,12 @@ PKG_CHECK_MODULES([URCU], [liburcu-bp], [],
AC_MSG_ERROR([liburcu-bp not found]))])
PKG_CHECK_MODULES([URCU_CDS], [liburcu-cds >= 0.8], [],
[PKG_CHECK_MODULES([URCU_CDS], [liburcu-cds >= 0.7],
- [AC_DEFINE(URCU_OLD, 1, [Define if liburcu 0.6 or 0.7 is found])],
+ [AC_DEFINE(URCU_OLD, 1, [Define if liburcu 0.6 or 0.7 is found])
+ USE_CONTRIB_URCU='yes'],
[AC_CHECK_HEADERS([urcu/cds.h],
[AC_DEFINE(URCU_OLD, 1, [Define if liburcu 0.6 or 0.7 is found])
- URCU_CDS_LIBS='-lurcu-cds'],
+ URCU_CDS_LIBS='-lurcu-cds'
+ USE_CONTRIB_URCU='yes'],
[AC_MSG_ERROR([liburcu-cds not found])])])])
BUILD_UNITTEST="no"
@@ -1523,9 +1590,9 @@ case $host_os in
;;
esac
dnl GF_XLATOR_DEFAULT_LDFLAGS is for most xlators that expose a common set of symbols
-GF_XLATOR_DEFAULT_LDFLAGS='-avoid-version -export-symbols $(top_srcdir)/xlators/xlator.sym $(UUID_LIBS) $(GF_NO_UNDEFINED)'
+GF_XLATOR_DEFAULT_LDFLAGS='-avoid-version -export-symbols $(top_srcdir)/xlators/xlator.sym $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)'
dnl GF_XLATOR_LDFLAGS is for xlators that expose extra symbols, e.g. dht
-GF_XLATOR_LDFLAGS='-avoid-version $(UUID_LIBS) $(GF_NO_UNDEFINED)'
+GF_XLATOR_LDFLAGS='-avoid-version $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)'
AC_SUBST(GF_HOST_OS)
AC_SUBST(GF_CFLAGS)
@@ -1540,12 +1607,22 @@ AC_SUBST(AM_LIBTOOLFLAGS)
AC_SUBST(GF_NO_UNDEFINED)
AC_SUBST(GF_XLATOR_DEFAULT_LDFLAGS)
AC_SUBST(GF_XLATOR_LDFLAGS)
+AC_SUBST(GF_XLATOR_MGNT_LIBADD)
+
+case $host_os in
+ *freebsd*)
+ GF_XLATOR_MGNT_LIBADD="-lutil -lprocstat"
+ ;;
+esac
CONTRIBDIR='$(top_srcdir)/contrib'
AC_SUBST(CONTRIBDIR)
GF_CPPDEFINES='-D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)'
GF_CPPINCLUDES='-include $(top_builddir)/config.h -include $(top_builddir)/site.h -I$(top_srcdir)/libglusterfs/src -I$(top_builddir)/libglusterfs/src'
+if test "x${USE_CONTRIB_URCU}" = "xyes"; then
+ GF_CPPINCLUDES="${GF_CPPINCLUDES} -I\$(CONTRIBDIR)/userspace-rcu"
+fi
GF_CPPFLAGS="$GF_CPPFLAGS $GF_CPPDEFINES $GF_CPPINCLUDES"
AC_SUBST([GF_CPPFLAGS])
@@ -1602,18 +1679,16 @@ echo
echo "GlusterFS configure summary"
echo "==========================="
echo "FUSE client : $BUILD_FUSE_CLIENT"
-echo "Infiniband verbs : $BUILD_IBVERBS"
echo "epoll IO multiplex : $BUILD_EPOLL"
echo "fusermount : $BUILD_FUSERMOUNT"
echo "readline : $BUILD_READLINE"
echo "georeplication : $BUILD_SYNCDAEMON"
echo "Linux-AIO : $BUILD_LIBAIO"
echo "Enable Debug : $BUILD_DEBUG"
-echo "Enable ASAN : $BUILD_ASAN"
-echo "Enable TSAN : $BUILD_TSAN"
+echo "Run with Valgrind : $VALGRIND_TOOL"
+echo "Sanitizer enabled : $SANITIZER"
echo "Use syslog : $USE_SYSLOG"
echo "XML output : $BUILD_XML_OUTPUT"
-echo "Encryption xlator : $BUILD_CRYPT_XLATOR"
echo "Unit Tests : $BUILD_UNITTEST"
echo "Track priv ports : $TRACK_PRIVPORTS"
echo "POSIX ACLs : $BUILD_POSIX_ACLS"
@@ -1629,4 +1704,14 @@ echo "IPV6 default : $with_ipv6_default"
echo "Use TIRPC : $with_libtirpc"
echo "With Python : ${PYTHON_VERSION}"
echo "Cloudsync : $BUILD_CLOUDSYNC"
+echo "Metadata dispersal : $BUILD_METADISP"
+echo "Link with TCMALLOC : $BUILD_TCMALLOC"
echo
+
+# dnl Note: ${X^^} capitalization assumes bash >= 4.x
+if test "x$SANITIZER" != "xnone"; then
+ echo "Note: since glusterfs processes are daemon processes, use"
+ echo "'export ${SANITIZER^^}_OPTIONS=log_path=/path/to/xxx.log' to collect"
+ echo "sanitizer output. Further details and more options can be"
+ echo "found at https://github.com/google/sanitizers."
+fi
diff --git a/contrib/fuse-lib/misc.c b/contrib/fuse-lib/misc.c
index 266bcc75125..1a9b418e511 100644
--- a/contrib/fuse-lib/misc.c
+++ b/contrib/fuse-lib/misc.c
@@ -10,7 +10,7 @@
#include <string.h>
#include <limits.h>
#include <fcntl.h>
-#include "glusterfs.h"
+#include "glusterfs/glusterfs.h"
#include "fuse_kernel.h"
#include "fuse-misc.h"
diff --git a/contrib/fuse-lib/mount-gluster-compat.h b/contrib/fuse-lib/mount-gluster-compat.h
index a16103e0fbb..d3646d08d8e 100644
--- a/contrib/fuse-lib/mount-gluster-compat.h
+++ b/contrib/fuse-lib/mount-gluster-compat.h
@@ -96,9 +96,9 @@ typedef long long mount_flag_t;
#define FREE(ptr) free (ptr)
#define GFFUSE_LOGERR(...) fprintf (stderr, ## __VA_ARGS__)
#else /* FUSE_UTIL */
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
#define GFFUSE_LOGERR(...) \
gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
diff --git a/contrib/fuse-lib/mount.c b/contrib/fuse-lib/mount.c
index ffa0a4b6316..06ff191f542 100644
--- a/contrib/fuse-lib/mount.c
+++ b/contrib/fuse-lib/mount.c
@@ -52,12 +52,16 @@ gf_fuse_unmount (const char *mountpoint, int fd)
if (geteuid () == 0) {
fuse_mnt_umount ("fuse", mountpoint, mountpoint, 1);
return;
+ } else {
+ GFFUSE_LOGERR ("fuse: Effective-uid: %d", geteuid());
}
res = umount2 (mountpoint, 2);
if (res == 0)
return;
+ GFFUSE_LOGERR ("fuse: failed to unmount %s: %s",
+ mountpoint, strerror (errno));
pid = fork ();
if (pid == -1)
return;
@@ -67,6 +71,8 @@ gf_fuse_unmount (const char *mountpoint, int fd)
"--", mountpoint, NULL };
execvp (FUSERMOUNT_PROG, (char **)argv);
+ GFFUSE_LOGERR ("fuse: failed to execute fuserumount: %s",
+ strerror (errno));
_exit (1);
}
waitpid (pid, NULL, 0);
@@ -384,6 +390,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname,
build_iovec (&iov, &iovlen, "from", "/dev/fuse", -1);
build_iovec (&iov, &iovlen, "volname", source, -1);
build_iovec (&iov, &iovlen, "fd", fdstr, -1);
+ build_iovec (&iov, &iovlen, "allow_other", NULL, -1);
ret = nmount (iov, iovlen, mountflags);
#else
ret = mount (source, mountpoint, fstype, mountflags,
diff --git a/contrib/macfuse/mount_darwin.c b/contrib/macfuse/mount_darwin.c
index 21ed7e503a6..d1d1c34e761 100644
--- a/contrib/macfuse/mount_darwin.c
+++ b/contrib/macfuse/mount_darwin.c
@@ -34,9 +34,9 @@
#include "fuse_param.h"
#include "fuse_ioctl.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
#define GFFUSE_LOGERR(...) \
gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
diff --git a/contrib/sunrpc/xdr_sizeof.c b/contrib/sunrpc/xdr_sizeof.c
deleted file mode 100644
index ca1f7bf0a5e..00000000000
--- a/contrib/sunrpc/xdr_sizeof.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- *
- * Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
- * Reserved. This file contains Original Code and/or Modifications of
- * Original Code as defined in and that are subject to the Apple Public
- * Source License Version 1.1 (the "License"). You may not use this file
- * except in compliance with the License. Please obtain a copy of the
- * License at http://www.apple.com/publicsource and read it before using
- * this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON- INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
- *
- * @APPLE_LICENSE_HEADER_END@
- */
-
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
- *
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
- *
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
- *
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
- *
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
- */
-
-/*
- * xdr_sizeof.c
- *
- * Copyright 1990 Sun Microsystems, Inc.
- *
- * General purpose routine to see how much space something will use
- * when serialized using XDR.
- */
-
-#ifdef GF_DARWIN_HOST_OS
-
-#include <rpc/types.h>
-#include <rpc/xdr.h>
-#include <sys/types.h>
-#include <sys/cdefs.h>
-
-#include <stdlib.h>
-
-/* ARGSUSED */
-#ifdef GF_DARWIN_HOST_OS
-static bool_t
-x_putlong (XDR *xdrs, const int *longp)
-{
- xdrs->x_handy += BYTES_PER_XDR_UNIT;
- return TRUE;
-}
-
-#else
-static bool_t
-x_putlong (XDR *xdrs, const long *longp)
-{
- xdrs->x_handy += BYTES_PER_XDR_UNIT;
- return TRUE;
-}
-#endif
-
-/* ARGSUSED */
-static bool_t
-x_putbytes (XDR *xdrs, const char *bp, u_int len)
-{
- xdrs->x_handy += len;
- return TRUE;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-static u_int
-x_getpostn (XDR *xdrs)
-{
- return xdrs->x_handy;
-}
-#else
-static u_int
-x_getpostn (const XDR *xdrs)
-{
- return xdrs->x_handy;
-}
-#endif
-
-/* ARGSUSED */
-static bool_t
-x_setpostn (XDR *xdrs, u_int len)
-{
- /* This is not allowed */
- return FALSE;
-}
-
-static int32_t *
-x_inline (XDR *xdrs, u_int len)
-{
- if (len == 0)
- return NULL;
- if (xdrs->x_op != XDR_ENCODE)
- return NULL;
- if (len < (u_int) (long int) xdrs->x_base)
- {
- /* x_private was already allocated */
- xdrs->x_handy += len;
- return (int32_t *) xdrs->x_private;
- }
- else
- {
- /* Free the earlier space and allocate new area */
- free (xdrs->x_private);
- if ((xdrs->x_private = (caddr_t) malloc (len)) == NULL)
- {
- xdrs->x_base = 0;
- return NULL;
- }
- xdrs->x_base = (void *) (long) len;
- xdrs->x_handy += len;
- return (int32_t *) xdrs->x_private;
- }
-}
-
-static int
-harmless (void)
-{
- /* Always return FALSE/NULL, as the case may be */
- return 0;
-}
-
-static void
-x_destroy (XDR *xdrs)
-{
- xdrs->x_handy = 0;
- xdrs->x_base = 0;
- if (xdrs->x_private)
- {
- free (xdrs->x_private);
- xdrs->x_private = NULL;
- }
- return;
-}
-
-unsigned long
-xdr_sizeof (xdrproc_t func, void *data)
-{
- XDR x;
- struct xdr_ops ops;
- bool_t stat;
-
-#ifdef GF_DARWIN_HOST_OS
- typedef bool_t (*dummyfunc1) (XDR *, int *);
-#else
- typedef bool_t (*dummyfunc1) (XDR *, long *);
-#endif
- typedef bool_t (*dummyfunc2) (XDR *, caddr_t, u_int);
-
- ops.x_putlong = x_putlong;
- ops.x_putbytes = x_putbytes;
- ops.x_inline = x_inline;
- ops.x_getpostn = x_getpostn;
- ops.x_setpostn = x_setpostn;
- ops.x_destroy = x_destroy;
-
- /* the other harmless ones */
- ops.x_getlong = (dummyfunc1) harmless;
- ops.x_getbytes = (dummyfunc2) harmless;
-
- x.x_op = XDR_ENCODE;
- x.x_ops = &ops;
- x.x_handy = 0;
- x.x_private = (caddr_t) NULL;
- x.x_base = (caddr_t) 0;
-
- stat = func (&x, data, 0);
- if (x.x_private)
- free (x.x_private);
- return (stat == TRUE ? (unsigned) x.x_handy : 0);
-}
-#endif /* GF_DARWIN_HOST_OS */
diff --git a/contrib/timer-wheel/timer-wheel.c b/contrib/timer-wheel/timer-wheel.c
index cfbd74e166f..58e0607bf0c 100644
--- a/contrib/timer-wheel/timer-wheel.c
+++ b/contrib/timer-wheel/timer-wheel.c
@@ -159,7 +159,14 @@ run_timers (struct tvec_base *base)
data = timer->data;
__gf_tw_detach_timer (timer);
- fn (timer, data, call_time);
+ pthread_spin_unlock(&base->lock);
+ {
+ /* It is required to run the actual function outside
+ of the locked zone, so we don't bother about
+ locked operations inside that function */
+ fn(timer, data, call_time);
+ }
+ pthread_spin_lock(&base->lock);
}
}
pthread_spin_unlock (&base->lock);
diff --git a/contrib/timer-wheel/timer-wheel.h b/contrib/timer-wheel/timer-wheel.h
index baa029ebb30..5637735ec22 100644
--- a/contrib/timer-wheel/timer-wheel.h
+++ b/contrib/timer-wheel/timer-wheel.h
@@ -17,9 +17,9 @@
#ifndef __TIMER_WHEEL_H
#define __TIMER_WHEEL_H
-#include "locking.h"
+#include "glusterfs/locking.h"
-#include "list.h"
+#include "glusterfs/list.h"
#define TVR_BITS 8
#define TVN_BITS 6
diff --git a/contrib/umountd/umountd.c b/contrib/umountd/umountd.c
index 6b9e3c43031..3f933ecb554 100644
--- a/contrib/umountd/umountd.c
+++ b/contrib/umountd/umountd.c
@@ -23,11 +23,11 @@
#include <sys/stat.h>
#include <sys/mount.h>
-#include "glusterfs.h"
-#include "globals.h"
-#include "logging.h"
-#include "syscall.h"
-#include "mem-types.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-types.h"
static void
usage (void)
diff --git a/contrib/userspace-rcu/static-wfcqueue.h b/contrib/userspace-rcu/static-wfcqueue.h
new file mode 100644
index 00000000000..37d14ad674b
--- /dev/null
+++ b/contrib/userspace-rcu/static-wfcqueue.h
@@ -0,0 +1,685 @@
+#ifndef _URCU_WFCQUEUE_STATIC_H
+#define _URCU_WFCQUEUE_STATIC_H
+
+/*
+ * urcu/static/wfcqueue.h
+ *
+ * Userspace RCU library - Concurrent Queue with Wait-Free Enqueue/Blocking Dequeue
+ *
+ * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu/wfcqueue.h for
+ * linking dynamically with the userspace rcu library.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright 2011-2012 - Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Copied from userspace-rcu 0.10 because version 0.7 doesn't contain it. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/uatomic.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Concurrent queue with wait-free enqueue/blocking dequeue.
+ *
+ * This queue has been designed and implemented collaboratively by
+ * Mathieu Desnoyers and Lai Jiangshan. Inspired from
+ * half-wait-free/half-blocking queue implementation done by Paul E.
+ * McKenney.
+ *
+ * Mutual exclusion of cds_wfcq_* / __cds_wfcq_* API
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * Legend:
+ * [1] cds_wfcq_enqueue
+ * [2] __cds_wfcq_splice (destination queue)
+ * [3] __cds_wfcq_dequeue
+ * [4] __cds_wfcq_splice (source queue)
+ * [5] __cds_wfcq_first
+ * [6] __cds_wfcq_next
+ *
+ * [1] [2] [3] [4] [5] [6]
+ * [1] - - - - - -
+ * [2] - - - - - -
+ * [3] - - X X X X
+ * [4] - - X - X X
+ * [5] - - X X - -
+ * [6] - - X X - -
+ *
+ * Mutual exclusion can be ensured by holding cds_wfcq_dequeue_lock().
+ *
+ * For convenience, cds_wfcq_dequeue_blocking() and
+ * cds_wfcq_splice_blocking() hold the dequeue lock.
+ *
+ * Besides locking, mutual exclusion of dequeue, splice and iteration
+ * can be ensured by performing all of those operations from a single
+ * thread, without requiring any lock.
+ */
+
+#define WFCQ_ADAPT_ATTEMPTS 10 /* Retry if being set */
+#define WFCQ_WAIT 10 /* Wait 10 ms if being set */
+
+/*
+ * cds_wfcq_node_init: initialize wait-free queue node.
+ */
+static inline void _cds_wfcq_node_init(struct cds_wfcq_node *node)
+{
+ node->next = NULL;
+}
+
+/*
+ * cds_wfcq_init: initialize wait-free queue (with lock). Pair with
+ * cds_wfcq_destroy().
+ */
+static inline void _cds_wfcq_init(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ /* Set queue head and tail */
+ _cds_wfcq_node_init(&head->node);
+ tail->p = &head->node;
+ ret = pthread_mutex_init(&head->lock, NULL);
+ assert(!ret);
+}
+
+/*
+ * cds_wfcq_destroy: destroy wait-free queue (with lock). Pair with
+ * cds_wfcq_init().
+ */
+static inline void _cds_wfcq_destroy(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret = pthread_mutex_destroy(&head->lock);
+ assert(!ret);
+}
+
+/*
+ * __cds_wfcq_init: initialize wait-free queue (without lock). Don't
+ * pair with any destroy function.
+ */
+static inline void ___cds_wfcq_init(struct __cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ /* Set queue head and tail */
+ _cds_wfcq_node_init(&head->node);
+ tail->p = &head->node;
+}
+
+/*
+ * cds_wfcq_empty: return whether wait-free queue is empty.
+ *
+ * No memory barrier is issued. No mutual exclusion is required.
+ *
+ * We perform the test on head->node.next to check if the queue is
+ * possibly empty, but we confirm this by checking if the tail pointer
+ * points to the head node because the tail pointer is the linearisation
+ * point of the enqueuers. Just checking the head next pointer could
+ * make a queue appear empty if an enqueuer is preempted for a long time
+ * between xchg() and setting the previous node's next pointer.
+ */
+static inline bool _cds_wfcq_empty(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ /*
+ * Queue is empty if no node is pointed by head->node.next nor
+ * tail->p. Even though the tail->p check is sufficient to find
+ * out of the queue is empty, we first check head->node.next as a
+ * common case to ensure that dequeuers do not frequently access
+ * enqueuer's tail->p cache line.
+ */
+ return CMM_LOAD_SHARED(head->node.next) == NULL
+ && CMM_LOAD_SHARED(tail->p) == &head->node;
+}
+
+static inline void _cds_wfcq_dequeue_lock(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ ret = pthread_mutex_lock(&head->lock);
+ assert(!ret);
+}
+
+static inline void _cds_wfcq_dequeue_unlock(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ ret = pthread_mutex_unlock(&head->lock);
+ assert(!ret);
+}
+
+static inline bool ___cds_wfcq_append(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *new_head,
+ struct cds_wfcq_node *new_tail)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *old_tail;
+
+ /*
+ * Implicit memory barrier before uatomic_xchg() orders earlier
+ * stores to data structure containing node and setting
+ * node->next to NULL before publication.
+ */
+ old_tail = uatomic_xchg(&tail->p, new_tail);
+
+ /*
+ * Implicit memory barrier after uatomic_xchg() orders store to
+ * q->tail before store to old_tail->next.
+ *
+ * At this point, dequeuers see a NULL tail->p->next, which
+ * indicates that the queue is being appended to. The following
+ * store will append "node" to the queue from a dequeuer
+ * perspective.
+ */
+ CMM_STORE_SHARED(old_tail->next, new_head);
+ /*
+ * Return false if queue was empty prior to adding the node,
+ * else return true.
+ */
+ return old_tail != &head->node;
+}
+
+/*
+ * cds_wfcq_enqueue: enqueue a node into a wait-free queue.
+ *
+ * Issues a full memory barrier before enqueue. No mutual exclusion is
+ * required.
+ *
+ * Returns false if the queue was empty prior to adding the node.
+ * Returns true otherwise.
+ */
+static inline bool _cds_wfcq_enqueue(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *new_tail)
+{
+ return ___cds_wfcq_append(head, tail, new_tail, new_tail);
+}
+
+/*
+ * CDS_WFCQ_WAIT_SLEEP:
+ *
+ * By default, this sleeps for the given @msec milliseconds.
+ * This is a macro which LGPL users may #define themselves before
+ * including wfcqueue.h to override the default behavior (e.g.
+ * to log a warning or perform other background work).
+ */
+#ifndef CDS_WFCQ_WAIT_SLEEP
+#define CDS_WFCQ_WAIT_SLEEP(msec) ___cds_wfcq_wait_sleep(msec)
+#endif
+
+static inline void ___cds_wfcq_wait_sleep(int msec)
+{
+ (void) poll(NULL, 0, msec);
+}
+
+/*
+ * ___cds_wfcq_busy_wait: adaptative busy-wait.
+ *
+ * Returns 1 if nonblocking and needs to block, 0 otherwise.
+ */
+static inline bool
+___cds_wfcq_busy_wait(int *attempt, int blocking)
+{
+ if (!blocking)
+ return 1;
+ if (++(*attempt) >= WFCQ_ADAPT_ATTEMPTS) {
+ CDS_WFCQ_WAIT_SLEEP(WFCQ_WAIT); /* Wait for 10ms */
+ *attempt = 0;
+ } else {
+ caa_cpu_relax();
+ }
+ return 0;
+}
+
+/*
+ * Waiting for enqueuer to complete enqueue and return the next node.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_node_sync_next(struct cds_wfcq_node *node, int blocking)
+{
+ struct cds_wfcq_node *next;
+ int attempt = 0;
+
+ /*
+ * Adaptative busy-looping waiting for enqueuer to complete enqueue.
+ */
+ while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ if (___cds_wfcq_busy_wait(&attempt, blocking))
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+
+ return next;
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_first(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ int blocking)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *node;
+
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(head), tail))
+ return NULL;
+ node = ___cds_wfcq_node_sync_next(&head->node, blocking);
+ /* Load head->node.next before loading node's content */
+ cmm_smp_read_barrier_depends();
+ return node;
+}
+
+/*
+ * __cds_wfcq_first_blocking: get first node of a queue, without dequeuing.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ *
+ * Used by for-like iteration macros in urcu/wfqueue.h:
+ * __cds_wfcq_for_each_blocking()
+ * __cds_wfcq_for_each_blocking_safe()
+ *
+ * Returns NULL if queue is empty, first node otherwise.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_first_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_first(head, tail, 1);
+}
+
+
+/*
+ * __cds_wfcq_first_nonblocking: get first node of a queue, without dequeuing.
+ *
+ * Same as __cds_wfcq_first_blocking, but returns CDS_WFCQ_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_first_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_first(head, tail, 0);
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_next(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node,
+ int blocking)
+{
+ struct cds_wfcq_node *next;
+
+ /*
+ * Even though the following tail->p check is sufficient to find
+ * out if we reached the end of the queue, we first check
+ * node->next as a common case to ensure that iteration on nodes
+ * do not frequently access enqueuer's tail->p cache line.
+ */
+ if ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ /* Load node->next before tail->p */
+ cmm_smp_rmb();
+ if (CMM_LOAD_SHARED(tail->p) == node)
+ return NULL;
+ next = ___cds_wfcq_node_sync_next(node, blocking);
+ }
+ /* Load node->next before loading next's content */
+ cmm_smp_read_barrier_depends();
+ return next;
+}
+
+/*
+ * __cds_wfcq_next_blocking: get next node of a queue, without dequeuing.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ *
+ * Used by for-like iteration macros in urcu/wfqueue.h:
+ * __cds_wfcq_for_each_blocking()
+ * __cds_wfcq_for_each_blocking_safe()
+ *
+ * Returns NULL if reached end of queue, non-NULL next queue node
+ * otherwise.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_next_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node)
+{
+ return ___cds_wfcq_next(head, tail, node, 1);
+}
+
+/*
+ * __cds_wfcq_next_blocking: get next node of a queue, without dequeuing.
+ *
+ * Same as __cds_wfcq_next_blocking, but returns CDS_WFCQ_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_next_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node)
+{
+ return ___cds_wfcq_next(head, tail, node, 0);
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ int *state,
+ int blocking)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *node, *next;
+
+ if (state)
+ *state = 0;
+
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(head), tail)) {
+ return NULL;
+ }
+
+ node = ___cds_wfcq_node_sync_next(&head->node, blocking);
+ if (!blocking && node == CDS_WFCQ_WOULDBLOCK) {
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+
+ if ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ /*
+ * @node is probably the only node in the queue.
+ * Try to move the tail to &q->head.
+ * q->head.next is set to NULL here, and stays
+ * NULL if the cmpxchg succeeds. Should the
+ * cmpxchg fail due to a concurrent enqueue, the
+ * q->head.next will be set to the next node.
+ * The implicit memory barrier before
+ * uatomic_cmpxchg() orders load node->next
+ * before loading q->tail.
+ * The implicit memory barrier before uatomic_cmpxchg
+ * orders load q->head.next before loading node's
+ * content.
+ */
+ _cds_wfcq_node_init(&head->node);
+ if (uatomic_cmpxchg(&tail->p, node, &head->node) == node) {
+ if (state)
+ *state |= CDS_WFCQ_STATE_LAST;
+ return node;
+ }
+ next = ___cds_wfcq_node_sync_next(node, blocking);
+ /*
+ * In nonblocking mode, if we would need to block to
+ * get node's next, set the head next node pointer
+ * (currently NULL) back to its original value.
+ */
+ if (!blocking && next == CDS_WFCQ_WOULDBLOCK) {
+ head->node.next = node;
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+ }
+
+ /*
+ * Move queue head forward.
+ */
+ head->node.next = next;
+
+ /* Load q->head.next before loading node's content */
+ cmm_smp_read_barrier_depends();
+ return node;
+}
+
+/*
+ * __cds_wfcq_dequeue_with_state_blocking: dequeue node from queue, with state.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * It is valid to reuse and free a dequeued node immediately.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ return ___cds_wfcq_dequeue_with_state(head, tail, state, 1);
+}
+
+/*
+ * ___cds_wfcq_dequeue_blocking: dequeue node from queue.
+ *
+ * Same as __cds_wfcq_dequeue_with_state_blocking, but without saving
+ * state.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_dequeue_with_state_blocking(head, tail, NULL);
+}
+
+/*
+ * __cds_wfcq_dequeue_with_state_nonblocking: dequeue node, with state.
+ *
+ * Same as __cds_wfcq_dequeue_blocking, but returns CDS_WFCQ_WOULDBLOCK
+ * if it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ return ___cds_wfcq_dequeue_with_state(head, tail, state, 0);
+}
+
+/*
+ * ___cds_wfcq_dequeue_nonblocking: dequeue node from queue.
+ *
+ * Same as __cds_wfcq_dequeue_with_state_nonblocking, but without saving
+ * state.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_dequeue_with_state_nonblocking(head, tail, NULL);
+}
+
+/*
+ * __cds_wfcq_splice: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Mutual exclusion for src_q should be ensured by the caller as
+ * specified in the "Synchronisation table".
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice(
+ cds_wfcq_head_ptr_t u_dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t u_src_q_head,
+ struct cds_wfcq_tail *src_q_tail,
+ int blocking)
+{
+ struct __cds_wfcq_head *dest_q_head = u_dest_q_head._h;
+ struct __cds_wfcq_head *src_q_head = u_src_q_head._h;
+ struct cds_wfcq_node *head, *tail;
+ int attempt = 0;
+
+ /*
+ * Initial emptiness check to speed up cases where queue is
+ * empty: only require loads to check if queue is empty.
+ */
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(src_q_head), src_q_tail))
+ return CDS_WFCQ_RET_SRC_EMPTY;
+
+ for (;;) {
+ /*
+ * Open-coded _cds_wfcq_empty() by testing result of
+ * uatomic_xchg, as well as tail pointer vs head node
+ * address.
+ */
+ head = uatomic_xchg(&src_q_head->node.next, NULL);
+ if (head)
+ break; /* non-empty */
+ if (CMM_LOAD_SHARED(src_q_tail->p) == &src_q_head->node)
+ return CDS_WFCQ_RET_SRC_EMPTY;
+ if (___cds_wfcq_busy_wait(&attempt, blocking))
+ return CDS_WFCQ_RET_WOULDBLOCK;
+ }
+
+ /*
+ * Memory barrier implied before uatomic_xchg() orders store to
+ * src_q->head before store to src_q->tail. This is required by
+ * concurrent enqueue on src_q, which exchanges the tail before
+ * updating the previous tail's next pointer.
+ */
+ tail = uatomic_xchg(&src_q_tail->p, &src_q_head->node);
+
+ /*
+ * Append the spliced content of src_q into dest_q. Does not
+ * require mutual exclusion on dest_q (wait-free).
+ */
+ if (___cds_wfcq_append(__cds_wfcq_head_cast(dest_q_head), dest_q_tail,
+ head, tail))
+ return CDS_WFCQ_RET_DEST_NON_EMPTY;
+ else
+ return CDS_WFCQ_RET_DEST_EMPTY;
+}
+
+/*
+ * __cds_wfcq_splice_blocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Mutual exclusion for src_q should be ensured by the caller as
+ * specified in the "Synchronisation table".
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue. Never returns CDS_WFCQ_RET_WOULDBLOCK.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice_blocking(
+ cds_wfcq_head_ptr_t dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ return ___cds_wfcq_splice(dest_q_head, dest_q_tail,
+ src_q_head, src_q_tail, 1);
+}
+
+/*
+ * __cds_wfcq_splice_nonblocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Same as __cds_wfcq_splice_blocking, but returns
+ * CDS_WFCQ_RET_WOULDBLOCK if it needs to block.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice_nonblocking(
+ cds_wfcq_head_ptr_t dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ return ___cds_wfcq_splice(dest_q_head, dest_q_tail,
+ src_q_head, src_q_tail, 0);
+}
+
+/*
+ * cds_wfcq_dequeue_with_state_blocking: dequeue a node from a wait-free queue.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Mutual exclusion with cds_wfcq_splice_blocking and dequeue lock is
+ * ensured.
+ * It is valid to reuse and free a dequeued node immediately.
+ */
+static inline struct cds_wfcq_node *
+_cds_wfcq_dequeue_with_state_blocking(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ struct cds_wfcq_node *retval;
+
+ _cds_wfcq_dequeue_lock(head, tail);
+ retval = ___cds_wfcq_dequeue_with_state_blocking(cds_wfcq_head_cast(head),
+ tail, state);
+ _cds_wfcq_dequeue_unlock(head, tail);
+ return retval;
+}
+
+/*
+ * cds_wfcq_dequeue_blocking: dequeue node from queue.
+ *
+ * Same as cds_wfcq_dequeue_blocking, but without saving state.
+ */
+static inline struct cds_wfcq_node *
+_cds_wfcq_dequeue_blocking(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ return _cds_wfcq_dequeue_with_state_blocking(head, tail, NULL);
+}
+
+/*
+ * cds_wfcq_splice_blocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Mutual exclusion with cds_wfcq_dequeue_blocking and dequeue lock is
+ * ensured.
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue. Never returns CDS_WFCQ_RET_WOULDBLOCK.
+ */
+static inline enum cds_wfcq_ret
+_cds_wfcq_splice_blocking(
+ struct cds_wfcq_head *dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ struct cds_wfcq_head *src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ enum cds_wfcq_ret ret;
+
+ _cds_wfcq_dequeue_lock(src_q_head, src_q_tail);
+ ret = ___cds_wfcq_splice_blocking(cds_wfcq_head_cast(dest_q_head), dest_q_tail,
+ cds_wfcq_head_cast(src_q_head), src_q_tail);
+ _cds_wfcq_dequeue_unlock(src_q_head, src_q_tail);
+ return ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_WFCQUEUE_STATIC_H */
diff --git a/contrib/userspace-rcu/static-wfstack.h b/contrib/userspace-rcu/static-wfstack.h
new file mode 100644
index 00000000000..29b81c3aac3
--- /dev/null
+++ b/contrib/userspace-rcu/static-wfstack.h
@@ -0,0 +1,455 @@
+#ifndef _URCU_STATIC_WFSTACK_H
+#define _URCU_STATIC_WFSTACK_H
+
+/*
+ * urcu/static/wfstack.h
+ *
+ * Userspace RCU library - Stack with with wait-free push, blocking traversal.
+ *
+ * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu/wfstack.h for
+ * linking dynamically with the userspace rcu library.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't support a stack
+ * without mutex. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/uatomic.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CDS_WFS_END ((void *) 0x1UL)
+#define CDS_WFS_ADAPT_ATTEMPTS 10 /* Retry if being set */
+#define CDS_WFS_WAIT 10 /* Wait 10 ms if being set */
+
+/*
+ * Stack with wait-free push, blocking traversal.
+ *
+ * Stack implementing push, pop, pop_all operations, as well as iterator
+ * on the stack head returned by pop_all.
+ *
+ * Wait-free operations: cds_wfs_push, __cds_wfs_pop_all, cds_wfs_empty,
+ * cds_wfs_first.
+ * Blocking operations: cds_wfs_pop, cds_wfs_pop_all, cds_wfs_next,
+ * iteration on stack head returned by pop_all.
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * cds_wfs_push __cds_wfs_pop __cds_wfs_pop_all
+ * cds_wfs_push - - -
+ * __cds_wfs_pop - X X
+ * __cds_wfs_pop_all - X -
+ *
+ * cds_wfs_pop and cds_wfs_pop_all use an internal mutex to provide
+ * synchronization.
+ */
+
+/*
+ * cds_wfs_node_init: initialize wait-free stack node.
+ */
+static inline
+void _cds_wfs_node_init(struct cds_wfs_node *node)
+{
+ node->next = NULL;
+}
+
+/*
+ * __cds_wfs_init: initialize wait-free stack. Don't pair with
+ * any destroy function.
+ */
+static inline void ___cds_wfs_init(struct __cds_wfs_stack *s)
+{
+ s->head = CDS_WFS_END;
+}
+
+/*
+ * cds_wfs_init: initialize wait-free stack. Pair with
+ * cds_wfs_destroy().
+ */
+static inline
+void _cds_wfs_init(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ s->head = CDS_WFS_END;
+ ret = pthread_mutex_init(&s->lock, NULL);
+ assert(!ret);
+}
+
+/*
+ * cds_wfs_destroy: destroy wait-free stack. Pair with
+ * cds_wfs_init().
+ */
+static inline
+void _cds_wfs_destroy(struct cds_wfs_stack *s)
+{
+ int ret = pthread_mutex_destroy(&s->lock);
+ assert(!ret);
+}
+
+static inline bool ___cds_wfs_end(void *node)
+{
+ return node == CDS_WFS_END;
+}
+
+/*
+ * cds_wfs_empty: return whether wait-free stack is empty.
+ *
+ * No memory barrier is issued. No mutual exclusion is required.
+ */
+static inline bool _cds_wfs_empty(cds_wfs_stack_ptr_t u_stack)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+
+ return ___cds_wfs_end(CMM_LOAD_SHARED(s->head));
+}
+
+/*
+ * cds_wfs_push: push a node into the stack.
+ *
+ * Issues a full memory barrier before push. No mutual exclusion is
+ * required.
+ *
+ * Returns 0 if the stack was empty prior to adding the node.
+ * Returns non-zero otherwise.
+ */
+static inline
+int _cds_wfs_push(cds_wfs_stack_ptr_t u_stack, struct cds_wfs_node *node)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+ struct cds_wfs_head *old_head, *new_head;
+
+ assert(node->next == NULL);
+ new_head = caa_container_of(node, struct cds_wfs_head, node);
+ /*
+ * uatomic_xchg() implicit memory barrier orders earlier stores
+ * to node (setting it to NULL) before publication.
+ */
+ old_head = uatomic_xchg(&s->head, new_head);
+ /*
+ * At this point, dequeuers see a NULL node->next, they should
+ * busy-wait until node->next is set to old_head.
+ */
+ CMM_STORE_SHARED(node->next, &old_head->node);
+ return !___cds_wfs_end(old_head);
+}
+
+/*
+ * Waiting for push to complete enqueue and return the next node.
+ */
+static inline struct cds_wfs_node *
+___cds_wfs_node_sync_next(struct cds_wfs_node *node, int blocking)
+{
+ struct cds_wfs_node *next;
+ int attempt = 0;
+
+ /*
+ * Adaptative busy-looping waiting for push to complete.
+ */
+ while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ if (!blocking)
+ return CDS_WFS_WOULDBLOCK;
+ if (++attempt >= CDS_WFS_ADAPT_ATTEMPTS) {
+ (void) poll(NULL, 0, CDS_WFS_WAIT); /* Wait for 10ms */
+ attempt = 0;
+ } else {
+ caa_cpu_relax();
+ }
+ }
+
+ return next;
+}
+
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop(cds_wfs_stack_ptr_t u_stack, int *state, int blocking)
+{
+ struct cds_wfs_head *head, *new_head;
+ struct cds_wfs_node *next;
+ struct __cds_wfs_stack *s = u_stack._s;
+
+ if (state)
+ *state = 0;
+ for (;;) {
+ head = CMM_LOAD_SHARED(s->head);
+ if (___cds_wfs_end(head)) {
+ return NULL;
+ }
+ next = ___cds_wfs_node_sync_next(&head->node, blocking);
+ if (!blocking && next == CDS_WFS_WOULDBLOCK) {
+ return CDS_WFS_WOULDBLOCK;
+ }
+ new_head = caa_container_of(next, struct cds_wfs_head, node);
+ if (uatomic_cmpxchg(&s->head, head, new_head) == head) {
+ if (state && ___cds_wfs_end(new_head))
+ *state |= CDS_WFS_STATE_LAST;
+ return &head->node;
+ }
+ if (!blocking) {
+ return CDS_WFS_WOULDBLOCK;
+ }
+ /* busy-loop if head changed under us */
+ }
+}
+
+/*
+ * __cds_wfs_pop_with_state_blocking: pop a node from the stack, with state.
+ *
+ * Returns NULL if stack is empty.
+ *
+ * __cds_wfs_pop_blocking needs to be synchronized using one of the
+ * following techniques:
+ *
+ * 1) Calling __cds_wfs_pop_blocking under rcu read lock critical
+ * section. The caller must wait for a grace period to pass before
+ * freeing the returned node or modifying the cds_wfs_node structure.
+ * 2) Using mutual exclusion (e.g. mutexes) to protect
+ * __cds_wfs_pop_blocking and __cds_wfs_pop_all callers.
+ * 3) Ensuring that only ONE thread can call __cds_wfs_pop_blocking()
+ * and __cds_wfs_pop_all(). (multi-provider/single-consumer scheme).
+ *
+ * "state" saves state flags atomically sampled with pop operation.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_with_state_blocking(cds_wfs_stack_ptr_t u_stack, int *state)
+{
+ return ___cds_wfs_pop(u_stack, state, 1);
+}
+
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_blocking(cds_wfs_stack_ptr_t u_stack)
+{
+ return ___cds_wfs_pop_with_state_blocking(u_stack, NULL);
+}
+
+/*
+ * __cds_wfs_pop_with_state_nonblocking: pop a node from the stack.
+ *
+ * Same as __cds_wfs_pop_with_state_blocking, but returns
+ * CDS_WFS_WOULDBLOCK if it needs to block.
+ *
+ * "state" saves state flags atomically sampled with pop operation.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_with_state_nonblocking(cds_wfs_stack_ptr_t u_stack, int *state)
+{
+ return ___cds_wfs_pop(u_stack, state, 0);
+}
+
+/*
+ * __cds_wfs_pop_nonblocking: pop a node from the stack.
+ *
+ * Same as __cds_wfs_pop_blocking, but returns CDS_WFS_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_nonblocking(cds_wfs_stack_ptr_t u_stack)
+{
+ return ___cds_wfs_pop_with_state_nonblocking(u_stack, NULL);
+}
+
+/*
+ * __cds_wfs_pop_all: pop all nodes from a stack.
+ *
+ * __cds_wfs_pop_all does not require any synchronization with other
+ * push, nor with other __cds_wfs_pop_all, but requires synchronization
+ * matching the technique used to synchronize __cds_wfs_pop_blocking:
+ *
+ * 1) If __cds_wfs_pop_blocking is called under rcu read lock critical
+ * section, both __cds_wfs_pop_blocking and cds_wfs_pop_all callers
+ * must wait for a grace period to pass before freeing the returned
+ * node or modifying the cds_wfs_node structure. However, no RCU
+ * read-side critical section is needed around __cds_wfs_pop_all.
+ * 2) Using mutual exclusion (e.g. mutexes) to protect
+ * __cds_wfs_pop_blocking and __cds_wfs_pop_all callers.
+ * 3) Ensuring that only ONE thread can call __cds_wfs_pop_blocking()
+ * and __cds_wfs_pop_all(). (multi-provider/single-consumer scheme).
+ */
+static inline
+struct cds_wfs_head *
+___cds_wfs_pop_all(cds_wfs_stack_ptr_t u_stack)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+ struct cds_wfs_head *head;
+
+ /*
+ * Implicit memory barrier after uatomic_xchg() matches implicit
+ * memory barrier before uatomic_xchg() in cds_wfs_push. It
+ * ensures that all nodes of the returned list are consistent.
+ * There is no need to issue memory barriers when iterating on
+ * the returned list, because the full memory barrier issued
+ * prior to each uatomic_cmpxchg, which each write to head, are
+ * taking care to order writes to each node prior to the full
+ * memory barrier after this uatomic_xchg().
+ */
+ head = uatomic_xchg(&s->head, CDS_WFS_END);
+ if (___cds_wfs_end(head))
+ return NULL;
+ return head;
+}
+
+/*
+ * cds_wfs_pop_lock: lock stack pop-protection mutex.
+ */
+static inline void _cds_wfs_pop_lock(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ ret = pthread_mutex_lock(&s->lock);
+ assert(!ret);
+}
+
+/*
+ * cds_wfs_pop_unlock: unlock stack pop-protection mutex.
+ */
+static inline void _cds_wfs_pop_unlock(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ ret = pthread_mutex_unlock(&s->lock);
+ assert(!ret);
+}
+
+/*
+ * Call __cds_wfs_pop_with_state_blocking with an internal pop mutex held.
+ */
+static inline
+struct cds_wfs_node *
+_cds_wfs_pop_with_state_blocking(struct cds_wfs_stack *s, int *state)
+{
+ struct cds_wfs_node *retnode;
+
+ _cds_wfs_pop_lock(s);
+ retnode = ___cds_wfs_pop_with_state_blocking(s, state);
+ _cds_wfs_pop_unlock(s);
+ return retnode;
+}
+
+/*
+ * Call _cds_wfs_pop_with_state_blocking without saving any state.
+ */
+static inline
+struct cds_wfs_node *
+_cds_wfs_pop_blocking(struct cds_wfs_stack *s)
+{
+ return _cds_wfs_pop_with_state_blocking(s, NULL);
+}
+
+/*
+ * Call __cds_wfs_pop_all with an internal pop mutex held.
+ */
+static inline
+struct cds_wfs_head *
+_cds_wfs_pop_all_blocking(struct cds_wfs_stack *s)
+{
+ struct cds_wfs_head *rethead;
+
+ _cds_wfs_pop_lock(s);
+ rethead = ___cds_wfs_pop_all(s);
+ _cds_wfs_pop_unlock(s);
+ return rethead;
+}
+
+/*
+ * cds_wfs_first: get first node of a popped stack.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ *
+ * Used by for-like iteration macros in urcu/wfstack.h:
+ * cds_wfs_for_each_blocking()
+ * cds_wfs_for_each_blocking_safe()
+ *
+ * Returns NULL if popped stack is empty, top stack node otherwise.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_first(struct cds_wfs_head *head)
+{
+ if (___cds_wfs_end(head))
+ return NULL;
+ return &head->node;
+}
+
+static inline struct cds_wfs_node *
+___cds_wfs_next(struct cds_wfs_node *node, int blocking)
+{
+ struct cds_wfs_node *next;
+
+ next = ___cds_wfs_node_sync_next(node, blocking);
+ /*
+ * CDS_WFS_WOULDBLOCK != CSD_WFS_END, so we can check for end
+ * even if ___cds_wfs_node_sync_next returns CDS_WFS_WOULDBLOCK,
+ * and still return CDS_WFS_WOULDBLOCK.
+ */
+ if (___cds_wfs_end(next))
+ return NULL;
+ return next;
+}
+
+/*
+ * cds_wfs_next_blocking: get next node of a popped stack.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ *
+ * Used by for-like iteration macros in urcu/wfstack.h:
+ * cds_wfs_for_each_blocking()
+ * cds_wfs_for_each_blocking_safe()
+ *
+ * Returns NULL if reached end of popped stack, non-NULL next stack
+ * node otherwise.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_next_blocking(struct cds_wfs_node *node)
+{
+ return ___cds_wfs_next(node, 1);
+}
+
+
+/*
+ * cds_wfs_next_nonblocking: get next node of a popped stack.
+ *
+ * Same as cds_wfs_next_blocking, but returns CDS_WFS_WOULDBLOCK if it
+ * needs to block.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_next_nonblocking(struct cds_wfs_node *node)
+{
+ return ___cds_wfs_next(node, 0);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_STATIC_WFSTACK_H */
diff --git a/contrib/userspace-rcu/wfcqueue.h b/contrib/userspace-rcu/wfcqueue.h
new file mode 100644
index 00000000000..0292585ac79
--- /dev/null
+++ b/contrib/userspace-rcu/wfcqueue.h
@@ -0,0 +1,216 @@
+#ifndef _URCU_WFCQUEUE_H
+#define _URCU_WFCQUEUE_H
+
+/*
+ * urcu/wfcqueue.h
+ *
+ * Userspace RCU library - Concurrent Queue with Wait-Free Enqueue/Blocking Dequeue
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright 2011-2012 - Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't contain it.
+ * The non-LGPL section has been removed. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Concurrent queue with wait-free enqueue/blocking dequeue.
+ *
+ * This queue has been designed and implemented collaboratively by
+ * Mathieu Desnoyers and Lai Jiangshan. Inspired from
+ * half-wait-free/half-blocking queue implementation done by Paul E.
+ * McKenney.
+ */
+
+#define CDS_WFCQ_WOULDBLOCK ((struct cds_wfcq_node *) -1UL)
+
+enum cds_wfcq_ret {
+ CDS_WFCQ_RET_WOULDBLOCK = -1,
+ CDS_WFCQ_RET_DEST_EMPTY = 0,
+ CDS_WFCQ_RET_DEST_NON_EMPTY = 1,
+ CDS_WFCQ_RET_SRC_EMPTY = 2,
+};
+
+enum cds_wfcq_state {
+ CDS_WFCQ_STATE_LAST = (1U << 0),
+};
+
+struct cds_wfcq_node {
+ struct cds_wfcq_node *next;
+};
+
+/*
+ * Do not put head and tail on the same cache-line if concurrent
+ * enqueue/dequeue are expected from many CPUs. This eliminates
+ * false-sharing between enqueue and dequeue.
+ */
+struct __cds_wfcq_head {
+ struct cds_wfcq_node node;
+};
+
+struct cds_wfcq_head {
+ struct cds_wfcq_node node;
+ pthread_mutex_t lock;
+};
+
+#ifndef __cplusplus
+/*
+ * The transparent union allows calling functions that work on both
+ * struct cds_wfcq_head and struct __cds_wfcq_head on any of those two
+ * types.
+ */
+typedef union {
+ struct __cds_wfcq_head *_h;
+ struct cds_wfcq_head *h;
+} __attribute__((__transparent_union__)) cds_wfcq_head_ptr_t;
+
+/*
+ * This static inline is only present for compatibility with C++. It is
+ * effect-less in C.
+ */
+static inline struct __cds_wfcq_head *__cds_wfcq_head_cast(struct __cds_wfcq_head *head)
+{
+ return head;
+}
+
+/*
+ * This static inline is only present for compatibility with C++. It is
+ * effect-less in C.
+ */
+static inline struct cds_wfcq_head *cds_wfcq_head_cast(struct cds_wfcq_head *head)
+{
+ return head;
+}
+#else /* #ifndef __cplusplus */
+
+/* C++ ignores transparent union. */
+typedef union {
+ struct __cds_wfcq_head *_h;
+ struct cds_wfcq_head *h;
+} cds_wfcq_head_ptr_t;
+
+/* C++ ignores transparent union. Requires an explicit conversion. */
+static inline cds_wfcq_head_ptr_t __cds_wfcq_head_cast(struct __cds_wfcq_head *head)
+{
+ cds_wfcq_head_ptr_t ret = { ._h = head };
+ return ret;
+}
+/* C++ ignores transparent union. Requires an explicit conversion. */
+static inline cds_wfcq_head_ptr_t cds_wfcq_head_cast(struct cds_wfcq_head *head)
+{
+ cds_wfcq_head_ptr_t ret = { .h = head };
+ return ret;
+}
+#endif /* #else #ifndef __cplusplus */
+
+struct cds_wfcq_tail {
+ struct cds_wfcq_node *p;
+};
+
+#include "static-wfcqueue.h"
+
+#define cds_wfcq_node_init _cds_wfcq_node_init
+#define cds_wfcq_init _cds_wfcq_init
+#define __cds_wfcq_init ___cds_wfcq_init
+#define cds_wfcq_destroy _cds_wfcq_destroy
+#define cds_wfcq_empty _cds_wfcq_empty
+#define cds_wfcq_enqueue _cds_wfcq_enqueue
+
+/* Dequeue locking */
+#define cds_wfcq_dequeue_lock _cds_wfcq_dequeue_lock
+#define cds_wfcq_dequeue_unlock _cds_wfcq_dequeue_unlock
+
+/* Locking performed within cds_wfcq calls. */
+#define cds_wfcq_dequeue_blocking _cds_wfcq_dequeue_blocking
+#define cds_wfcq_dequeue_with_state_blocking \
+ _cds_wfcq_dequeue_with_state_blocking
+#define cds_wfcq_splice_blocking _cds_wfcq_splice_blocking
+#define cds_wfcq_first_blocking _cds_wfcq_first_blocking
+#define cds_wfcq_next_blocking _cds_wfcq_next_blocking
+
+/* Locking ensured by caller by holding cds_wfcq_dequeue_lock() */
+#define __cds_wfcq_dequeue_blocking ___cds_wfcq_dequeue_blocking
+#define __cds_wfcq_dequeue_with_state_blocking \
+ ___cds_wfcq_dequeue_with_state_blocking
+#define __cds_wfcq_splice_blocking ___cds_wfcq_splice_blocking
+#define __cds_wfcq_first_blocking ___cds_wfcq_first_blocking
+#define __cds_wfcq_next_blocking ___cds_wfcq_next_blocking
+
+/*
+ * Locking ensured by caller by holding cds_wfcq_dequeue_lock().
+ * Non-blocking: deque, first, next return CDS_WFCQ_WOULDBLOCK if they
+ * need to block. splice returns nonzero if it needs to block.
+ */
+#define __cds_wfcq_dequeue_nonblocking ___cds_wfcq_dequeue_nonblocking
+#define __cds_wfcq_dequeue_with_state_nonblocking \
+ ___cds_wfcq_dequeue_with_state_nonblocking
+#define __cds_wfcq_splice_nonblocking ___cds_wfcq_splice_nonblocking
+#define __cds_wfcq_first_nonblocking ___cds_wfcq_first_nonblocking
+#define __cds_wfcq_next_nonblocking ___cds_wfcq_next_nonblocking
+
+/*
+ * __cds_wfcq_for_each_blocking: Iterate over all nodes in a queue,
+ * without dequeuing them.
+ * @head: head of the queue (struct cds_wfcq_head or __cds_wfcq_head pointer).
+ * @tail: tail of the queue (struct cds_wfcq_tail pointer).
+ * @node: iterator on the queue (struct cds_wfcq_node pointer).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+#define __cds_wfcq_for_each_blocking(head, tail, node) \
+ for (node = __cds_wfcq_first_blocking(head, tail); \
+ node != NULL; \
+ node = __cds_wfcq_next_blocking(head, tail, node))
+
+/*
+ * __cds_wfcq_for_each_blocking_safe: Iterate over all nodes in a queue,
+ * without dequeuing them. Safe against deletion.
+ * @head: head of the queue (struct cds_wfcq_head or __cds_wfcq_head pointer).
+ * @tail: tail of the queue (struct cds_wfcq_tail pointer).
+ * @node: iterator on the queue (struct cds_wfcq_node pointer).
+ * @n: struct cds_wfcq_node pointer holding the next pointer (used
+ * internally).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+#define __cds_wfcq_for_each_blocking_safe(head, tail, node, n) \
+ for (node = __cds_wfcq_first_blocking(head, tail), \
+ n = (node ? __cds_wfcq_next_blocking(head, tail, node) : NULL); \
+ node != NULL; \
+ node = n, n = (node ? __cds_wfcq_next_blocking(head, tail, node) : NULL))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_WFCQUEUE_H */
diff --git a/contrib/userspace-rcu/wfstack.h b/contrib/userspace-rcu/wfstack.h
new file mode 100644
index 00000000000..738fd1cfd33
--- /dev/null
+++ b/contrib/userspace-rcu/wfstack.h
@@ -0,0 +1,178 @@
+#ifndef _URCU_WFSTACK_H
+#define _URCU_WFSTACK_H
+
+/*
+ * urcu/wfstack.h
+ *
+ * Userspace RCU library - Stack with wait-free push, blocking traversal.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't support a stack
+ * without mutex. The non-LGPL section has been removed. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Stack with wait-free push, blocking traversal.
+ *
+ * Stack implementing push, pop, pop_all operations, as well as iterator
+ * on the stack head returned by pop_all.
+ *
+ * Wait-free operations: cds_wfs_push, __cds_wfs_pop_all, cds_wfs_empty,
+ * cds_wfs_first.
+ * Blocking operations: cds_wfs_pop, cds_wfs_pop_all, cds_wfs_next,
+ * iteration on stack head returned by pop_all.
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * cds_wfs_push __cds_wfs_pop __cds_wfs_pop_all
+ * cds_wfs_push - - -
+ * __cds_wfs_pop - X X
+ * __cds_wfs_pop_all - X -
+ *
+ * cds_wfs_pop and cds_wfs_pop_all use an internal mutex to provide
+ * synchronization.
+ */
+
+#define CDS_WFS_WOULDBLOCK ((void *) -1UL)
+
+enum cds_wfs_state {
+ CDS_WFS_STATE_LAST = (1U << 0),
+};
+
+/*
+ * struct cds_wfs_node is returned by __cds_wfs_pop, and also used as
+ * iterator on stack. It is not safe to dereference the node next
+ * pointer when returned by __cds_wfs_pop_blocking.
+ */
+struct cds_wfs_node {
+ struct cds_wfs_node *next;
+};
+
+/*
+ * struct cds_wfs_head is returned by __cds_wfs_pop_all, and can be used
+ * to begin iteration on the stack. "node" needs to be the first field of
+ * cds_wfs_head, so the end-of-stack pointer value can be used for both
+ * types.
+ */
+struct cds_wfs_head {
+ struct cds_wfs_node node;
+};
+
+struct __cds_wfs_stack {
+ struct cds_wfs_head *head;
+};
+
+struct cds_wfs_stack {
+ struct cds_wfs_head *head;
+ pthread_mutex_t lock;
+};
+
+/*
+ * The transparent union allows calling functions that work on both
+ * struct cds_wfs_stack and struct __cds_wfs_stack on any of those two
+ * types.
+ */
+typedef union {
+ struct __cds_wfs_stack *_s;
+ struct cds_wfs_stack *s;
+} __attribute__((__transparent_union__)) cds_wfs_stack_ptr_t;
+
+#include "static-wfstack.h"
+
+#define cds_wfs_node_init _cds_wfs_node_init
+#define cds_wfs_init _cds_wfs_init
+#define cds_wfs_destroy _cds_wfs_destroy
+#define __cds_wfs_init ___cds_wfs_init
+#define cds_wfs_empty _cds_wfs_empty
+#define cds_wfs_push _cds_wfs_push
+
+/* Locking performed internally */
+#define cds_wfs_pop_blocking _cds_wfs_pop_blocking
+#define cds_wfs_pop_with_state_blocking _cds_wfs_pop_with_state_blocking
+#define cds_wfs_pop_all_blocking _cds_wfs_pop_all_blocking
+
+/*
+ * For iteration on cds_wfs_head returned by __cds_wfs_pop_all or
+ * cds_wfs_pop_all_blocking.
+ */
+#define cds_wfs_first _cds_wfs_first
+#define cds_wfs_next_blocking _cds_wfs_next_blocking
+#define cds_wfs_next_nonblocking _cds_wfs_next_nonblocking
+
+/* Pop locking with internal mutex */
+#define cds_wfs_pop_lock _cds_wfs_pop_lock
+#define cds_wfs_pop_unlock _cds_wfs_pop_unlock
+
+/* Synchronization ensured by the caller. See synchronization table. */
+#define __cds_wfs_pop_blocking ___cds_wfs_pop_blocking
+#define __cds_wfs_pop_with_state_blocking \
+ ___cds_wfs_pop_with_state_blocking
+#define __cds_wfs_pop_nonblocking ___cds_wfs_pop_nonblocking
+#define __cds_wfs_pop_with_state_nonblocking \
+ ___cds_wfs_pop_with_state_nonblocking
+#define __cds_wfs_pop_all ___cds_wfs_pop_all
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+ * cds_wfs_for_each_blocking: Iterate over all nodes returned by
+ * __cds_wfs_pop_all().
+ * @head: head of the queue (struct cds_wfs_head pointer).
+ * @node: iterator (struct cds_wfs_node pointer).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ */
+#define cds_wfs_for_each_blocking(head, node) \
+ for (node = cds_wfs_first(head); \
+ node != NULL; \
+ node = cds_wfs_next_blocking(node))
+
+/*
+ * cds_wfs_for_each_blocking_safe: Iterate over all nodes returned by
+ * __cds_wfs_pop_all(). Safe against deletion.
+ * @head: head of the queue (struct cds_wfs_head pointer).
+ * @node: iterator (struct cds_wfs_node pointer).
+ * @n: struct cds_wfs_node pointer holding the next pointer (used
+ * internally).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ */
+#define cds_wfs_for_each_blocking_safe(head, node, n) \
+ for (node = cds_wfs_first(head), \
+ n = (node ? cds_wfs_next_blocking(node) : NULL); \
+ node != NULL; \
+ node = n, n = (node ? cds_wfs_next_blocking(node) : NULL))
+
+#endif /* _URCU_WFSTACK_H */
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 7c04d74019a..de68c20b4d7 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,9 +1,9 @@
EXTRA_DIST = glusterfs.8 mount.glusterfs.8 gluster.8 \
glusterd.8 glusterfsd.8
-man8_MANS = glusterfs.8 mount.glusterfs.8
+man8_MANS = glusterfs.8 mount.glusterfs.8 gluster.8
if WITH_SERVER
-man8_MANS += gluster.8 glusterd.8 glusterfsd.8
+man8_MANS += glusterd.8 glusterfsd.8
endif
CLEANFILES =
diff --git a/doc/README.md b/doc/README.md
index 8a92bd990d0..6aa28642ef4 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -18,7 +18,7 @@ The Gluster features which are 'in progress' or implemented can be found at [git
## Upgrade Guide ##
-The gluster upgrade guide is maintained at [github](https://github.com/gluster/glusterdocs). The browsable upgrade guide can be found [here](http://docs.gluster.org/en/latest/Upgrade-Guide/README/)
+The gluster upgrade guide is maintained at [github](https://github.com/gluster/glusterdocs). The browsable upgrade guide can be found [here](http://docs.gluster.org/en/latest/Upgrade-Guide)
The doc patch has to be sent against the above mentioned repository.
diff --git a/doc/debugging/analyzing-regression-cores.md b/doc/debugging/analyzing-regression-cores.md
index cbbb387794d..5e10f41c6eb 100644
--- a/doc/debugging/analyzing-regression-cores.md
+++ b/doc/debugging/analyzing-regression-cores.md
@@ -1,36 +1,35 @@
-This document explains how to analyze core-dumps obtained from regression
-machines, with examples.
-1) Download the core-tarball and extract it.
-2) 'cd' into directory where the tarball is extracted.
-~~~
+# Analyzing Regression Cores
+This document explains how to analyze core-dumps obtained from regression machines, with examples.
+1. Download the core-tarball and extract it.
+2. `cd` into directory where the tarball is extracted.
+```
[sh]# pwd
/home/user/Downloads
[sh]# ls
build build-install-20150625_05_42_39.tar.bz2 lib64 usr
-~~~
-3) Determine the core file you need to examine. There can be more than one core file.
-You can list them from './build/install/cores' directory.
-~~~
+```
+3. Determine the core file you need to examine. There can be more than one core file. You can list them from './build/install/cores' directory.
+```
[sh]# ls build/install/cores/
core.9341 liblist.txt liblist.txt.tmp
-~~~
+```
In case you are unsure which binary generated the core-file, executing 'file' command on it will help.
-~~~
+```
[sh]# file ./build/install/cores/core.9341
./build/install/cores/core.9341: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from '/build/install/sbin/glusterfsd -s slave26.cloud.gluster.org --volfile-id patchy'
-~~~
-As seen, the core file was generated by glusterfsd binary, and path to it is provided (/build/install/sbin/glusterfsd).
-4) Now, run the following command on the core:
-~~~
+```
+As seen, the core file was generated by glusterfsd binary, and path to it is provided (/build/install/sbin/glusterfsd).
+
+4. Now, run the following command on the core:
+```
gdb -ex 'set sysroot ./' -ex 'core-file ./build/install/cores/core.xxx' <target, say ./build/install/sbin/glusterd>
In this case,
gdb -ex 'set sysroot ./' -ex 'core-file ./build/install/cores/core.9341' ./build/install/sbin/glusterfsd
-~~~
-5) You can cross check if all shared libraries are available and loaded by using 'info sharedlibrary' command from
-inside gdb.
-6) Once verified, usual gdb commands based on requirement can be used to debug the core.
-'bt' or 'backtrace' from gdb of core used in examples:
-~~~
+```
+5. You can cross check if all shared libraries are available and loaded by using 'info sharedlibrary' command from inside gdb.
+6. Once verified, usual gdb commands based on requirement can be used to debug the core.
+ `bt` or `backtrace` from gdb of core used in examples:
+```
Core was generated by `/build/install/sbin/glusterfsd -s slave26.cloud.gluster.org --volfile-id patchy'.
Program terminated with signal SIGABRT, Aborted.
#0 0x00007f512a54e625 in raise () from ./lib64/libc.so.6
@@ -52,4 +51,4 @@ Program terminated with signal SIGABRT, Aborted.
#12 0x00007f512a55f8f0 in ?? () from ./lib64/libc.so.6
#13 0x0000000000000000 in ?? ()
(gdb)
-~~~
+```
diff --git a/doc/debugging/gfid-to-path.md b/doc/debugging/gfid-to-path.md
index 09c459e52c8..1917bf2cca1 100644
--- a/doc/debugging/gfid-to-path.md
+++ b/doc/debugging/gfid-to-path.md
@@ -1,37 +1,37 @@
-#Convert GFID to Path
+# Convert GFID to Path
GlusterFS internal file identifier (GFID) is a uuid that is unique to each
file across the entire cluster. This is analogous to inode number in a
normal filesystem. The GFID of a file is stored in its xattr named
`trusted.gfid`.
-####Special mount using [gfid-access translator][1]:
-~~~
+#### Special mount using [gfid-access translator][1]:
+```
mount -t glusterfs -o aux-gfid-mount vm1:test /mnt/testvol
-~~~
+```
Assuming, you have `GFID` of a file from changelog (or somewhere else).
For trying this out, you can get `GFID` of a file from mountpoint:
-~~~
+```
getfattr -n glusterfs.gfid.string /mnt/testvol/dir/file
-~~~
+```
---
-###Get file path from GFID (Method 1):
+### Get file path from GFID (Method 1):
**(Lists hardlinks delimited by `:`, returns path as seen from mountpoint)**
-####Turn on build-pgfid option
-~~~
+#### Turn on build-pgfid option
+```
gluster volume set test build-pgfid on
-~~~
+```
Read virtual xattr `glusterfs.ancestry.path` which contains the file path
-~~~
+```
getfattr -n glusterfs.ancestry.path -e text /mnt/testvol/.gfid/<GFID>
-~~~
+```
**Example:**
-~~~
+```
[root@vm1 glusterfs]# ls -il /mnt/testvol/dir/
total 1
10610563327990022372 -rw-r--r--. 2 root root 3 Jul 17 18:05 file
@@ -46,28 +46,23 @@ glusterfs.gfid.string="11118443-1894-4273-9340-4b212fa1c0e4"
getfattr: Removing leading '/' from absolute path names
# file: mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
glusterfs.ancestry.path="/dir/file:/dir/file3"
-~~~
+```
---
-###Get file path from GFID (Method 2):
+### Get file path from GFID (Method 2):
**(Does not list all hardlinks, returns backend brick path)**
-~~~
+```
getfattr -n trusted.glusterfs.pathinfo -e text /mnt/testvol/.gfid/<GFID>
-~~~
+```
**Example:**
-~~~
+```
[root@vm1 glusterfs]# getfattr -n trusted.glusterfs.pathinfo -e text /mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
getfattr: Removing leading '/' from absolute path names
# file: mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
trusted.glusterfs.pathinfo="(<DISTRIBUTE:test-dht> <POSIX(/mnt/brick-test/b):vm1:/mnt/brick-test/b/dir//file3>)"
-~~~
+```
---
-###Get file path from GFID (Method 3):
-https://gist.github.com/semiosis/4392640
-
----
-####References and links:
+#### References and links:
[posix: placeholders for GFID to path conversion](http://review.gluster.org/5951)
-[1]: https://github.com/gluster/glusterfs/blob/master/doc/features/gfid-access.md
diff --git a/doc/debugging/split-brain.md b/doc/debugging/split-brain.md
index b0d938e26bc..6b122c40551 100644
--- a/doc/debugging/split-brain.md
+++ b/doc/debugging/split-brain.md
@@ -1,33 +1,36 @@
-Steps to recover from File split-brain.
-======================================
-
-Quick Start:
-============
-1. Get the path of the file that is in split-brain:
-> It can be obtained either by
-> a) The command `gluster volume heal info split-brain`.
-> b) Identify the files for which file operations performed
- from the client keep failing with Input/Output error.
-
-2. Close the applications that opened this file from the mount point.
+# Steps to recover from File split-brain
+This document contains steps to recover from a file split-brain.
+## Quick Start:
+### Step 1. Get the path of the file that is in split-brain:
+It can be obtained either by
+1. The command `gluster volume heal info split-brain`.
+2. Identify the files for which file operations performed from the client keep failing with Input/Output error.
+
+### Step 2. Close the applications that opened this file from the mount point.
In case of VMs, they need to be powered-off.
-3. Decide on the correct copy:
-> This is done by observing the afr changelog extended attributes of the file on
+### Step 3. Decide on the correct copy:
+This is done by observing the afr changelog extended attributes of the file on
the bricks using the getfattr command; then identifying the type of split-brain
(data split-brain, metadata split-brain, entry split-brain or split-brain due to
gfid-mismatch); and finally determining which of the bricks contains the 'good copy'
of the file.
-> `getfattr -d -m . -e hex <file-path-on-brick>`.
+```
+getfattr -d -m . -e hex <file-path-on-brick>
+```
+
It is also possible that one brick might contain the correct data while the
other might contain the correct metadata.
-4. Reset the relevant extended attribute on the brick(s) that contains the
-'bad copy' of the file data/metadata using the setfattr command.
-> `setfattr -n <attribute-name> -v <attribute-value> <file-path-on-brick>`
+### Step 4. Reset the relevant extended attribute on the brick(s) that contains the 'bad copy' of the file data/metadata using the setfattr command.
+```
+setfattr -n <attribute-name> -v <attribute-value> <file-path-on-brick>
+```
-5. Trigger self-heal on the file by performing lookup from the client:
-> `ls -l <file-path-on-gluster-mount>`
+### Step 5. Trigger self-heal on the file by performing lookup from the client:
+```
+ls -l <file-path-on-gluster-mount>
+```
Detailed Instructions for steps 3 through 5:
===========================================
@@ -36,13 +39,15 @@ afr changelog extended attributes.
Execute `getfattr -d -m . -e hex <file-path-on-brick>`
-* Example:
+Example:
+```
[root@store3 ~]# getfattr -d -e hex -m. brick-a/file.txt
\#file: brick-a/file.txt
security.selinux=0x726f6f743a6f626a6563745f723a66696c655f743a733000
trusted.afr.vol-client-2=0x000000000000000000000000
trusted.afr.vol-client-3=0x000000000200000000000000
trusted.gfid=0x307a5c9efddd4e7c96e94fd4bcdcbd1b
+```
The extended attributes with `trusted.afr.<volname>-client-<subvolume-index>`
are used by afr to maintain changelog of the file.The values of the
@@ -51,10 +56,11 @@ client (fuse or nfs-server) processes. When the glusterfs client modifies a file
or directory, the client contacts each brick and updates the changelog extended
attribute according to the response of the brick.
-'subvolume-index' is nothing but (brick number - 1) in
+`subvolume-index` is nothing but (brick number - 1) in
`gluster volume info <volname>` output.
-* Example:
+Example:
+```
[root@pranithk-laptop ~]# gluster volume info vol
Volume Name: vol
Type: Distributed-Replicate
@@ -71,6 +77,7 @@ attribute according to the response of the brick.
brick-f: pranithk-laptop:/gfs/brick-f
brick-g: pranithk-laptop:/gfs/brick-g
brick-h: pranithk-laptop:/gfs/brick-h
+```
In the example above:
```
@@ -91,12 +98,15 @@ present in all the other bricks in it's replica set as seen by that brick.
In the example volume given above, all files in brick-a will have 2 entries,
one for itself and the other for the file present in it's replica pair, i.e.brick-b:
+```
trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for itself (brick-a)
trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for brick-b as seen by brick-a
-
+```
Likewise, all files in brick-b will have:
+```
trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for brick-a as seen by brick-b
trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for itself (brick-b)
+```
The same can be extended for other replica pairs.
@@ -122,7 +132,8 @@ When a file split-brain happens it could be either data split-brain or
meta-data split-brain or both. When a split-brain happens the changelog of the
file would be something like this:
-* Example:(Lets consider both data, metadata split-brain on same file).
+Example:(Lets consider both data, metadata split-brain on same file).
+```
[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
getfattr: Removing leading '/' from absolute path names
\#file: gfs/brick-a/a
@@ -133,10 +144,11 @@ trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
trusted.afr.vol-client-0=0x000003b00000000100000000
trusted.afr.vol-client-1=0x000000000000000000000000
trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+```
-###Observations:
+### Observations:
-####According to changelog extended attributes on file /gfs/brick-a/a:
+#### According to changelog extended attributes on file /gfs/brick-a/a:
The first 8 digits of trusted.afr.vol-client-0 are all
zeros (0x00000000................), and the first 8 digits of
trusted.afr.vol-client-1 are not all zeros (0x000003d7................).
@@ -149,7 +161,7 @@ trusted.afr.vol-client-1 are not all zeros (0x........00000001........).
So the changelog on /gfs/brick-a/a implies that some metadata operations succeeded
on itself but failed on /gfs/brick-b/a.
-####According to Changelog extended attributes on file /gfs/brick-b/a:
+#### According to Changelog extended attributes on file /gfs/brick-b/a:
The first 8 digits of trusted.afr.vol-client-0 are not all
zeros (0x000003b0................), and the first 8 digits of
trusted.afr.vol-client-1 are all zeros (0x00000000................).
@@ -205,6 +217,7 @@ Hence execute
`setfattr -n trusted.afr.vol-client-1 -v 0x000003d70000000000000000 /gfs/brick-a/a`
Thus after the above operations are done, the changelogs look like this:
+```
[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
getfattr: Removing leading '/' from absolute path names
\#file: gfs/brick-a/a
@@ -216,7 +229,7 @@ trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
trusted.afr.vol-client-0=0x000000000000000100000000
trusted.afr.vol-client-1=0x000000000000000000000000
trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
-
+```
Triggering Self-heal:
---------------------
@@ -243,9 +256,9 @@ needs to be removed.The gfid-link files are present in the .glusterfs folder
in the top-level directory of the brick. If the gfid of the file is
0x307a5c9efddd4e7c96e94fd4bcdcbd1b (the trusted.gfid extended attribute got
from the getfattr command earlier),the gfid-link file can be found at
-> /gfs/brick-a/.glusterfs/30/7a/307a5c9efddd4e7c96e94fd4bcdcbd1b
+`/gfs/brick-a/.glusterfs/30/7a/307a5c9efddd4e7c96e94fd4bcdcbd1b`
-####Word of caution:
+#### Word of caution:
Before deleting the gfid-link, we have to ensure that there are no hard links
to the file present on that brick. If hard-links exist,they must be deleted as
well.
diff --git a/doc/debugging/statedump.md b/doc/debugging/statedump.md
index 9939576e270..9dfdce15fad 100644
--- a/doc/debugging/statedump.md
+++ b/doc/debugging/statedump.md
@@ -1,21 +1,30 @@
-#Statedump
+# Statedump
Statedump is a file generated by glusterfs process with different data structure state which may contain the active inodes, fds, mempools, iobufs, memory allocation stats of different types of datastructures per xlator etc.
-##How to generate statedump
-We can find the directory where statedump files are created using 'gluster --print-statedumpdir' command.
+## How to generate statedump
+We can find the directory where statedump files are created using `gluster --print-statedumpdir` command.
Create that directory if not already present based on the type of installation.
Lets call this directory `statedump-directory`.
-We can generate statedump using 'kill -USR1 <pid-of-gluster-process>'.
+We can generate statedump using `kill -USR1 <pid-of-gluster-process>`.
gluster-process is nothing but glusterd/glusterfs/glusterfsd process.
There are also commands to generate statedumps for brick processes/nfs server/quotad
-For bricks: `gluster volume statedump <volname>`
+For bricks:
+```
+gluster volume statedump <volname>
+```
-For nfs server: `gluster volume statedump <volname> nfs`
+For nfs server:
+```
+gluster volume statedump <volname> nfs
+```
-For quotad: `gluster volume statedump <volname> quotad`
+For quotad:
+```
+gluster volume statedump <volname> quotad
+```
For brick-processes files will be created in `statedump-directory` with name of the file as `hyphenated-brick-path.<pid>.dump.timestamp`. For all other processes it will be `glusterdump.<pid>.dump.timestamp`.
@@ -24,21 +33,21 @@ processes could have used the `SIGUSR1` signal already for other purposes.
To generate statedump for the processes, using libgfapi, below command can be
executed from one of the nodes in the gluster cluster to which the libgfapi
application is connected to.
-
- gluster volume statedump <volname> client <hostname>:<process id>
-
+```
+gluster volume statedump <volname> client <hostname>:<process id>
+```
The statedumps can be found in the `statedump-directory`, the name of the
statedumps being `glusterdump.<pid>.dump.timestamp`. For a process there can be
multiple such files created depending on the number of times the volume is
accessed by the process (related to the number of `glfs_init()` calls).
-##How to read statedump
+## How to read statedump
We shall see snippets of each type of statedump.
First and last lines of the file have starting and ending time of writing the statedump file. Times will be in UTC timezone.
mallinfo return status is printed in the following format. Please read man mallinfo for more information about what each field means.
-###Mallinfo
+### Mallinfo
```
[mallinfo]
mallinfo_arena=100020224 /* Non-mmapped space allocated (bytes) */
@@ -53,7 +62,7 @@ mallinfo_fordblks=3310112 /* Total free space (bytes) */
mallinfo_keepcost=133712 /* Top-most, releasable space (bytes) */
```
-###Data structure allocation stats
+### Data structure allocation stats
For every xlator data structure memory per translator loaded in the call-graph is displayed in the following format:
For xlator with name: glusterfs
@@ -74,7 +83,7 @@ max_num_allocs=3 #Maximum number of active allocations at any point in the life
total_allocs=7 #Number of times this data is allocated in the life of the process.
```
-###Mempools
+### Mempools
Mempools are optimization to reduce the number of allocations of a data type. If we create a mem-pool of lets say 1024 elements for a data-type, new elements will be allocated from heap using syscalls like calloc, only if all the 1024 elements in the pool are in active use.
@@ -94,7 +103,7 @@ cur-stdalloc=0 #Denotes the number of allocations made from heap once cold-count
max-stdalloc=0 #Maximum number of allocations from heap that are in active use at any point in the life of the process.
```
-###Iobufs
+### Iobufs
```
[iobuf.global]
iobuf_pool=0x1f0d970 #The memory pool for iobufs
@@ -105,7 +114,7 @@ iobuf_pool.arena_cnt=8 #Total number of arenas in the pool
iobuf_pool.request_misses=0 #The number of iobufs that were stdalloc'd (as they exceeded the default max page size provided by iobuf_pool).
```
-There are 3 lists of arenas
+There are 3 lists of arenas:
1. Arena list: arenas allocated during iobuf pool creation and the arenas that are in use(active_cnt != 0) will be part of this list.
2. Purge list: arenas that can be purged(no active iobufs, active_cnt == 0).
@@ -142,7 +151,7 @@ arena.6.active_iobuf.2.ptr=0x7fdb92189000
At any given point in time if there are lots of filled arenas then that could be a sign of iobuf leaks.
-###Call stack
+### Call stack
All the fops received by gluster are handled using call-stacks. Call stack contains the information about uid/gid/pid etc of the process that is executing the fop. Each call-stack contains different call-frames per xlator which handles that fop.
```
@@ -157,7 +166,7 @@ op=LOOKUP #Fop
type=1 #Type of the op i.e. FOP/MGMT-OP
cnt=9 #Number of frames in this stack.
```
-###Call-frame
+### Call-frame
Each frame will have information about which xlator the frame belongs to, what is the function it wound to/from and will be unwind to. It also mentions if the unwind happened or not. If we observe hangs in the system and want to find out which xlator is causing it. Take a statedump and see what is the final xlator which is yet to be unwound.
```
@@ -172,7 +181,7 @@ wind_to=priv->children[i]->fops->lookup
unwind_to=afr_lookup_cbk #Parent xlator function to which unwind happened
```
-###History of operations in Fuse
+### History of operations in Fuse
Fuse maintains history of operations that happened in fuse.
@@ -188,7 +197,7 @@ TIME=2014-07-09 16:44:57.523394
message=[0] fuse_getattr_resume: 4591, STAT, path: (/iozone.tmp), gfid: (3afb4968-5100-478d-91e9-76264e634c9f)
```
-###Xlator configuration
+### Xlator configuration
```
[cluster/replicate.r2-replicate-0] #Xlator type, name information
child_count=2 #Number of children to the xlator
@@ -208,7 +217,7 @@ favorite_child=-1
wait_count=1
```
-###Graph/inode table
+### Graph/inode table
```
[active graph - 1]
@@ -220,7 +229,7 @@ conn.1.bound_xl./data/brick01a/homegfs.lru_size=183 #Number of inodes present in
conn.1.bound_xl./data/brick01a/homegfs.purge_size=0 #Number of inodes present in purge list
```
-###Inode
+### Inode
```
[conn.1.bound_xl./data/brick01a/homegfs.active.324] #324th inode in active inode list
gfid=e6d337cf-97eb-44b3-9492-379ba3f6ad42 #Gfid of the inode
@@ -239,7 +248,7 @@ ia_type=2
Ref by xl:.fuse=1
Ref by xl:.patchy-client-0=-1
```
-###Inode context
+### Inode context
For each inode per xlator some context could be stored. This context can also be printed in the statedump. Here is the inode ctx of locks xlator
```
[xlator.features.locks.homegfs-locks.inode]
@@ -256,12 +265,12 @@ lock-dump.domain.domain=homegfs-replicate-0 #Domain name where entry/data operat
inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=11141120, len=131072, pid = 18446744073709551615, owner=080b1ada117f0000, client=0xb7fc30, connection-id=compute-30-029.com-3505-2014/06/29-14:46:12:477358-homegfs-client-0-0-1, granted at Sun Jun 29 11:10:36 2014 #Active lock information
```
-##FAQ
-###How to debug Memory leaks using statedump?
+## FAQ
+### How to debug Memory leaks using statedump?
-####Using memory accounting feature:
+#### Using memory accounting feature:
-`https://bugzilla.redhat.com/show_bug.cgi?id=1120151` is one of the bugs which was debugged using statedump to see which data-structure is leaking. Here is the process used to find what the leak is using statedump. According to the bug the observation is that the process memory usage is increasing whenever one of the bricks is wiped in a replicate volume and a `full` self-heal is invoked to heal the contents. Statedump of the process is taken using kill -USR1 `<pid-of-gluster-self-heal-daemon>`.
+[Bug 1120151](https://bugzilla.redhat.com/show_bug.cgi?id=1120151) is one of the bugs which was debugged using statedump to see which data-structure is leaking. Here is the process used to find what the leak is using statedump. According to the bug the observation is that the process memory usage is increasing whenever one of the bricks is wiped in a replicate volume and a `full` self-heal is invoked to heal the contents. Statedump of the process is taken using `kill -USR1 <pid-of-gluster-self-heal-daemon>`.
```
grep -w num_allocs glusterdump.5225.dump.1405493251
num_allocs=77078
@@ -284,10 +293,10 @@ grep of the statedump revealed too many allocations for the following data-types
3. gf_common_mt_mem_pool.
After checking afr-code for allocations with tag `gf_common_mt_char` found `data-self-heal` code path does not free one such allocated memory. `gf_common_mt_mem_pool` suggests that there is a leak in pool memory. `replicate-0:dict_t`, `glusterfs:data_t` and `glusterfs:data_pair_t` pools are using lot of memory, i.e. cold_count is `0` and too many allocations. Checking source code of dict.c revealed that `key` in `dict` is allocated with `gf_common_mt_char` i.e. `2.` tag and value is created using gf_asprintf which in-turn uses `gf_common_mt_asprintf` i.e. `1.`. Browsing the code for leak in self-heal code paths lead to a line which over-writes a variable with new dictionary even when it was already holding a reference to another dictionary. After fixing these leaks, ran the same test to verify that none of the `num_allocs` are increasing even after healing 10,000 files directory hierarchy in statedump of self-heal daemon.
-Please check http://review.gluster.org/8316 for more info about patch/code.
+Please check this [patch](http://review.gluster.org/8316) for more info about the fix.
-####Debugging leaks in memory pools:
-Statedump output of memory pools was used to test and verify the fixes to https://bugzilla.redhat.com/show_bug.cgi?id=1134221. On code analysis, dict_t objects were found to be leaking (in terms of not being unref'd enough number of times, during name self-heal. The test involved creating 100 files on plain replicate volume, removing them from one of the bricks's backend, and then triggering lookup on them from the mount point. Statedump of the mount process was taken before executing the test case and after it, after compiling glusterfs with -DDEBUG flags (to have cold count set to 0 by default).
+#### Debugging leaks in memory pools:
+Statedump output of memory pools was used to test and verify the fixes to [Bug 1134221](https://bugzilla.redhat.com/show_bug.cgi?id=1134221). On code analysis, dict_t objects were found to be leaking (in terms of not being unref'd enough number of times, during name self-heal. The test involved creating 100 files on plain replicate volume, removing them from one of the brick's backend, and then triggering lookup on them from the mount point. Statedump of the mount process was taken before executing the test case and after it, after compiling glusterfs with -DDEBUG flags (to have cold count set to 0 by default).
Statedump output of the fuse mount process before the test case was executed:
@@ -319,7 +328,7 @@ cur-stdalloc=214
max-stdalloc=220
```
-Here, with cold count being 0 by default, cur-stdalloc indicated the number of dict_t objects that were allocated in heap using mem_get(), and yet to be freed using mem_put() (refer to https://github.com/gluster/glusterfs/blob/master/doc/data-structures/mem-pool.md for more details on how mempool works). After the test case (name selfheal of 100 files), there was a rise in the cur-stdalloc value (from 14 to 214) for dict_t.
+Here, with cold count being 0 by default, `cur-stdalloc` indicated the number of `dict_t` objects that were allocated in heap using `mem_get()`, and yet to be freed using `mem_put()` (refer to this [page](../developer-guide/datastructure-mem-pool.md) for more details on how mempool works). After the test case (name selfheal of 100 files), there was a rise in the cur-stdalloc value (from 14 to 214) for `dict_t`.
After these leaks were fixed, glusterfs was again compiled with -DDEBUG flags, and the same steps were performed again and statedump was taken before and after executing the test case, of the mount. This was done to ascertain the validity of the fix. And the following are the results:
@@ -353,8 +362,8 @@ max-stdalloc=119
```
The value of cur-stdalloc remained 14 before and after the test, indicating that the fix indeed does what it's supposed to do.
-###How to debug hangs because of frame-loss?
-`https://bugzilla.redhat.com/show_bug.cgi?id=994959` is one of the bugs where statedump was helpful in finding where the frame was lost. Here is the process used to find where the hang is using statedump.
+### How to debug hangs because of frame-loss?
+[Bug 994959](https://bugzilla.redhat.com/show_bug.cgi?id=994959) is one of the bugs where statedump was helpful in finding where the frame was lost. Here is the process used to find where the hang is using statedump.
When the hang was observed, statedumps are taken for all the processes. On mount's statedump the following stack is shown:
```
[global.callpool.stack.1.frame.1]
@@ -402,4 +411,4 @@ unwind_to=qr_readdirp_cbk
```
`unwind_to` shows that call was unwound to `afr_readdirp_cbk` from client xlator.
Inspecting that function revealed that afr is not unwinding the stack when fop failed.
-Check http://review.gluster.org/5531 for more info about patch/code changes.
+Check this [patch](http://review.gluster.org/5531) for more info about the fix.
diff --git a/doc/developer-guide/Language-Bindings.md b/doc/developer-guide/Language-Bindings.md
index 33f2e58504c..951f5fae2f6 100644
--- a/doc/developer-guide/Language-Bindings.md
+++ b/doc/developer-guide/Language-Bindings.md
@@ -1,3 +1,4 @@
+# Language Bindings
GlusterFS 3.4 introduced the libgfapi client API for C programs. This
page lists bindings to the libgfapi C library from other languages.
diff --git a/doc/developer-guide/README.md b/doc/developer-guide/README.md
index 4d15a7a2e4e..aaf9c7476b0 100644
--- a/doc/developer-guide/README.md
+++ b/doc/developer-guide/README.md
@@ -51,11 +51,15 @@ Daemon Management Framework
Translators
-----------
-- [Block Device Tanslator](./bd-xlator.md)
- [Performance/write-Behind Translator](./write-behind.md)
- [Translator Development](./translator-development.md)
- [Storage/posix Translator](./posix.md)
-- [Compression translator](./network_compression.md)
+
+
+Brick multiplex
+---------------
+
+- [Brick mux resource reduction](./brickmux-thread-reduction.md)
Fuse
----
@@ -67,9 +71,9 @@ Testing/Debugging
- [Unit Tests in GlusterFS](./unittest.md)
- [Using the Gluster Test
- Framework](./Using Gluster Test Framework.md) - Step by
+ Framework](./Using-Gluster-Test-Framework.md) - Step by
step instructions for running the Gluster Test Framework
-- [Coredump Analysis](./coredump-analysis.md) - Steps to analize coredumps generated by regression machines.
+- [Coredump Analysis](../debugging/analyzing-regression-cores.md) - Steps to analize coredumps generated by regression machines.
- [Identifying Resource Leaks](./identifying-resource-leaks.md)
Release Process
diff --git a/doc/developer-guide/Using-Gluster-Test-Framework.md b/doc/developer-guide/Using-Gluster-Test-Framework.md
index 96fa9247e84..d2bb1c391da 100644
--- a/doc/developer-guide/Using-Gluster-Test-Framework.md
+++ b/doc/developer-guide/Using-Gluster-Test-Framework.md
@@ -1,3 +1,4 @@
+# USing Gluster Test Framwork
Description
-----------
diff --git a/doc/developer-guide/afr-locks-evolution.md b/doc/developer-guide/afr-locks-evolution.md
index 7d2a136d871..2dabbcfeb13 100644
--- a/doc/developer-guide/afr-locks-evolution.md
+++ b/doc/developer-guide/afr-locks-evolution.md
@@ -32,10 +32,10 @@ AFR makes use of locks xlator extensively:
* For Entry self-heal, it is `entrylk(NULL name, parent inode)`. Specifying NULL for the name takes full lock on the directory referred to by the inode.
* For data self-heal, there is a bit of history as to how locks evolved:
-###Initial version (say version 1) :
+### Initial version (say version 1) :
There was no concept of selfheal daemon (shd). Only client lookups triggered heals. so AFR always took `inodelk(0,0,DATA_DOMAIN)` for healing. The issue with this approach was that when heal was in progress, I/O from clients was blocked .
-###version 2:
+### version 2:
shd was introduced. We needed to allow I/O to go through when heal was going,provided the ranges did not overlap. To that extent, the following approach was adopted:
+ 1.shd takes (full inodelk in DATA_DOMAIN). Thus client FOPS are blocked and cannot modify changelog-xattrs
@@ -79,7 +79,7 @@ It modifies data but the FOP succeeds only on brick 2. writev returns success, a
and thus goes ahead and copies stale 128Kb from brick 1 to brick2. Thus as far as application is concerned, `writev` returned success but bricks have stale data.
What needs to be done is `writev` must return success only if it succeeded on atleast one source brick (brick b1 in this case). Otherwise The heal still happens in reverse direction but as far as the application is concerned, it received an error.
-###Note on lock **domains**
+### Note on lock **domains**
We have used conceptual names in this document like DATA_DOMAIN/ METADATA_DOMAIN/ SELF_HEAL_DOMAIN. In the code, these are mapped to strings that are based on the AFR xlator name like so:
DATA_DOMAIN --->"vol_name-replicate-n"
diff --git a/doc/developer-guide/afr-self-heal-daemon.md b/doc/developer-guide/afr-self-heal-daemon.md
index b85ddd1c856..65940d420b7 100644
--- a/doc/developer-guide/afr-self-heal-daemon.md
+++ b/doc/developer-guide/afr-self-heal-daemon.md
@@ -39,7 +39,7 @@ When a client (mount) performs an operation on the file, the index xlator presen
and removes it in post-op phase if the operation is successful. Thus if an entry is present inside the .glusterfs/indices/xattrop/ directory when there is no I/O
happening on the file, it means the file needs healing (or atleast an examination if the brick crashed after the post-op completed but just before the removal of the hardlink).
-####Index heal steps:
+#### Index heal steps:
<pre><code>
In shd process of *each node* {
opendir +readdir (.glusterfs/indices/xattrop/)
diff --git a/doc/developer-guide/bd-xlator.md b/doc/developer-guide/bd-xlator.md
deleted file mode 100644
index 1771fb6e24b..00000000000
--- a/doc/developer-guide/bd-xlator.md
+++ /dev/null
@@ -1,469 +0,0 @@
-#Block device translator
-
-Block device translator (BD xlator) is a translator added to GlusterFS which provides block backend for GlusterFS. This replaces the existing bd_map translator in GlusterFS that provided similar but very limited functionality. GlusterFS expects the underlying brick to be formatted with a POSIX compatible file system. BD xlator changes that and allows for having bricks that are raw block devices like LVM which needn’t have any file systems on them. Hence with BD xlator, it becomes possible to build a GlusterFS volume comprising of bricks that are logical volumes (LV).
-
-##bd
-
-BD xlator maps underlying LVs to files and hence the LVs appear as files to GlusterFS clients. Though BD volume externally appears very similar to the usual Posix volume, not all operations are supported or possible for the files on a BD volume. Only those operations that make sense for a block device are supported and the exact semantics are described in subsequent sections.
-
-While Posix volume takes a file system directory as brick, BD volume needs a volume group (VG) as brick. In the usual use case of BD volume, a file created on BD volume will result in an LV being created in the brick VG. In addition to a VG, BD volume also needs a file system directory that should be specified at the volume creation time. This directory is necessary for supporting the notion of directories and directory hierarchy for the BD volume. Metadata about LVs (size, mapping info) is stored in this directory.
-
-BD xlator was mainly developed to use block devices directly as VM images when GlusterFS is used as storage for KVM virtualization. Some of the salient points of BD xlator are
-
-* Since BD supports file level snapshots and clones by leveraging the snapshot and clone capabilities of LVM, it can be used to fully off-load snapshot and cloning operations from QEMU to the storage (GlusterFS) itself.
-
-* BD understands dm-thin LVs and hence can support files that are backed by thinly provisioned LVs. This capability of BD xlator translates to having thinly provisioned raw VM images.
-
-* BD enables thin LVs from a thin pool to be used from multiple nodes that have visibility to GlusterFS BD volume. Thus thin pool can be used as a VM image repository allowing access/visibility to it from multiple nodes.
-
-* BD supports true zerofill by using BLKZEROOUT ioctl on underlying block devices. Thus BD allows SCSI WRITESAME to be used on underlying block device if the device supports it.
-
-Though BD xlator is primarily intended to be used with block devices, it does provide full Posix xlator compatibility for files that are created on BD volume but are not backed by or mapped to a block device. Such files which don’t have a block device mapping exist on the Posix directory that is specified during BD volume creation. BD xlator is available from GlusterFS-3.5 release.
-
-###Compiling BD translator
-
-BD xlator needs lvm2 development library. –enable-bd-xlator option can be used with `./configure` script to explicitly enable BD translator. The following snippet from the output of configure script shows that BD xlator is enabled for compilation.
-
-
-#####GlusterFS configure summary
-
- …
- Block Device xlator : yes
-
-
-###Creating a BD volume
-
-BD supports hosting of both linear LV and thin LV within the same volume. However seperate examples are provided below. As noted above, the prerequisite for a BD volume is VG which is created from a loop device here, but it can be any other device too.
-
-
-* Creating BD volume with linear LV backend
-
-* Create a loop device
-
-
- [root@node ~]# dd if=/dev/zero of=bd-loop count=1024 bs=1M
-
- [root@node ~]# losetup /dev/loop0 bd-loop
-
-
-* Prepare a brick by creating a VG
-
- [root@node ~]# pvcreate /dev/loop0
-
- [root@node ~]# vgcreate bd-vg /dev/loop0
-
-
-* Create the BD volume
-
-* Create a POSIX directory first
-
-
- [root@node ~]# mkdir /bd-meta
-
-It is recommended that this directory is created on an LV in the brick VG itself so that both data and metadata live together on the same device.
-
-
-* Create and mount the volume
-
- [root@node ~]# gluster volume create bd node:/bd-meta?bd-vg force
-
-
-The general syntax for specifying the brick is `host:/posix-dir?volume-group-name` where “?” is the separator.
-
-
-
- [root@node ~]# gluster volume start bd
- [root@node ~]# gluster volume info bd
- Volume Name: bd
- Type: Distribute
- Volume ID: cb042d2a-f435-4669-b886-55f5927a4d7f
- Status: Started
- Xlator 1: BD
- Capability 1: offload_copy
- Capability 2: offload_snapshot
- Number of Bricks: 1
- Transport-type: tcp
- Bricks:
- Brick1: node:/bd-meta
- Brick1 VG: bd-vg
-
-
-
- [root@node ~]# mount -t glusterfs node:/bd /mnt
-
-* Create a file that is backed by an LV
-
- [root@node ~]# ls /mnt
-
- [root@node ~]#
-
-Since the volume is empty now, so is the underlying VG.
-
- [root@node ~]# lvdisplay bd-vg
- [root@node ~]#
-
-Creating a file that is mapped to an LV is a 2 step operation. First the file should be created on the mount point and a specific extended attribute should be set to map the file to LV.
-
- [root@node ~]# touch /mnt/lv
- [root@node ~]# setfattr -n “user.glusterfs.bd” -v “lv” /mnt/lv
-
-Now an LV got created in the VG brick and the file /mnt/lv maps to this LV. Any read/write to this file ends up as read/write to the underlying LV.
-
- [root@node ~]# lvdisplay bd-vg
- — Logical volume —
- LV Path /dev/bd-vg/6ff0f25f-2776-4d19-adfb-df1a3cab8287
- LV Name 6ff0f25f-2776-4d19-adfb-df1a3cab8287
- VG Name bd-vg
- LV UUID PjMPcc-RkD5-RADz-6ixG-UYsk-oclz-vL0nv6
- LV Write Access read/write
- LV Creation host, time node, 2013-11-26 16:15:45 +0530
- LV Status available
- open 0
- LV Size 4.00 MiB
- Current LE 1
- Segments 1
- Allocation inherit
- Read ahead sectors 0
- Block device 253:6
-
-The file gets created with default LV size which is 1 LE which is 4MB in this case.
-
- [root@node ~]# ls -lh /mnt/lv
- -rw-r–r–. 1 root root 4.0M Nov 26 16:15 /mnt/lv
-
-truncate can be used to set the required file size.
-
- [root@node ~]# truncate /mnt/lv -s 256M
- [root@node ~]# lvdisplay bd-vg
- — Logical volume —
- LV Path /dev/bd-vg/6ff0f25f-2776-4d19-adfb-df1a3cab8287
- LV Name 6ff0f25f-2776-4d19-adfb-df1a3cab8287
- VG Name bd-vg
- LV UUID PjMPcc-RkD5-RADz-6ixG-UYsk-oclz-vL0nv6
- LV Write Access read/write
- LV Creation host, time node, 2013-11-26 16:15:45 +0530
- LV Status available
- # open 0
- LV Size 256.00 MiB
- Current LE 64
- Segments 1
- Allocation inherit
- Read ahead sectors 0
- Block device 253:6
-
-
- [root@node ~]# ls -lh /mnt/lv
- -rw-r–r–. 1 root root 256M Nov 26 16:15 /mnt/lv
-
- currently LV size has been set to 256
-
-The size of the file/LV can be specified during creation/mapping time itself like this:
-
- setfattr -n “user.glusterfs.bd” -v “lv:256MB” /mnt/lv
-
-2. Creating BD volume with thin LV backend
-
-* Create a loop device
-
-
- [root@node ~]# dd if=/dev/zero of=bd-loop-thin count=1024 bs=1M
-
- [root@node ~]# losetup /dev/loop0 bd-loop-thin
-
-
-* Prepare a brick by creating a VG and thin pool
-
-
- [root@node ~]# pvcreate /dev/loop0
-
- [root@node ~]# vgcreate bd-vg-thin /dev/loop0
-
-
-* Create a thin pool
-
-
- [root@node ~]# lvcreate –thin bd-vg-thin -L 1000M
-
- Rounding up size to full physical extent 4.00 MiB
- Logical volume “lvol0″ created
-
-lvdisplay shows the thin pool
-
- [root@node ~]# lvdisplay bd-vg-thin
- — Logical volume —
- LV Name lvol0
- VG Name bd-vg-thin
- LV UUID HVa3EM-IVMS-QG2g-oqU6-1UxC-RgqS-g8zhVn
- LV Write Access read/write
- LV Creation host, time node, 2013-11-26 16:39:06 +0530
- LV Pool transaction ID 0
- LV Pool metadata lvol0_tmeta
- LV Pool data lvol0_tdata
- LV Pool chunk size 64.00 KiB
- LV Zero new blocks yes
- LV Status available
- # open 0
- LV Size 1000.00 MiB
- Allocated pool data 0.00%
- Allocated metadata 0.88%
- Current LE 250
- Segments 1
- Allocation inherit
- Read ahead sectors auto
- Block device 253:9
-
-* Create the BD volume
-
-* Create a POSIX directory first
-
-
- [root@node ~]# mkdir /bd-meta-thin
-
-* Create and mount the volume
-
- [root@node ~]# gluster volume create bd-thin node:/bd-meta-thin?bd-vg-thin force
-
- [root@node ~]# gluster volume start bd-thin
-
-
- [root@node ~]# gluster volume info bd-thin
- Volume Name: bd-thin
- Type: Distribute
- Volume ID: 27aa7eb0-4ffa-497e-b639-7cbda0128793
- Status: Started
- Xlator 1: BD
- Capability 1: thin
- Capability 2: offload_copy
- Capability 3: offload_snapshot
- Number of Bricks: 1
- Transport-type: tcp
- Bricks:
- Brick1: node:/bd-meta-thin
- Brick1 VG: bd-vg-thin
-
-
- [root@node ~]# mount -t glusterfs node:/bd-thin /mnt
-
-* Create a file that is backed by a thin LV
-
-
- [root@node ~]# ls /mnt
-
- [root@node ~]#
-
-Creating a file that is mapped to a thin LV is a 2 step operation. First the file should be created on the mount point and a specific extended attribute should be set to map the file to a thin LV.
-
- [root@node ~]# touch /mnt/thin-lv
-
- [root@node ~]# setfattr -n “user.glusterfs.bd” -v “thin:256MB” /mnt/thin-lv
-
-Now /mnt/thin-lv is a thin provisioned file that is backed by a thin LV and size has been set to 256.
-
- [root@node ~]# lvdisplay bd-vg-thin
- — Logical volume —
- LV Name lvol0
- VG Name bd-vg-thin
- LV UUID HVa3EM-IVMS-QG2g-oqU6-1UxC-RgqS-g8zhVn
- LV Write Access read/write
- LV Creation host, time node, 2013-11-26 16:39:06 +0530
- LV Pool transaction ID 1
- LV Pool metadata lvol0_tmeta
- LV Pool data lvol0_tdata
- LV Pool chunk size 64.00 KiB
- LV Zero new blocks yes
- LV Status available
- # open 0
- LV Size 000.00 MiB
- Allocated pool data 0.00%
- Allocated metadata 0.98%
- Current LE 250
- Segments 1
- Allocation inherit
- Read ahead sectors auto
- Block device 253:9
-
-
-
-
- — Logical volume —
- LV Path dev/bd-vg-thin/081b01d1-1436-4306-9baf-41c7bf5a2c73
- LV Name 081b01d1-1436-4306-9baf-41c7bf5a2c73
- VG Name bd-vg-thin
- LV UUID coxpTY-2UZl-9293-8H2X-eAZn-wSp6-csZIeB
- LV Write Access read/write
- LV Creation host, time node, 2013-11-26 16:43:19 +0530
- LV Pool name lvol0
- LV Status available
- # open 0
- LV Size 256.00 MiB
- Mapped size 0.00%
- Current LE 64
- Segments 1
- Allocation inherit
- Read ahead sectors auto
- Block device 253:10
-
-
-
-
-
-As can be seen from above, creation of a file resulted in creation of a thin LV in the brick.
-
-
-###Improvisation on BD translator:
-
-First version of BD xlator ( block backend) had few limitations such as
-
-* Creation of directories not supported
-* Supports only single brick
-* Does not use extended attributes (and client gfid) like posix xlator
-* Creation of special files (symbolic links, device nodes etc) not
- supported
-
-Basic limitation of not allowing directory creation was blocking
-oVirt/VDSM to consume BD xlator as part of Gluster domain since VDSM
-creates multi-level directories when GlusterFS is used as storage
-backend for storing VM images.
-
-To overcome these limitations a new BD xlator with following
-improvements are implemented.
-
-* New hybrid BD xlator that handles both regular files and block device
- files
-* The volume will have both POSIX and BD bricks. Regular files are
- created on POSIX bricks, block devices are created on the BD brick (VG)
-* BD xlator leverages exiting POSIX xlator for most POSIX calls and
- hence sits above the POSIX xlator
-* Block device file is differentiated from regular file by an extended
- attribute
-* The xattr 'user.glusterfs.bd' (BD_XATTR) plays a role in mapping a
- posix file to Logical Volume (LV).
-* When a client sends a request to set BD_XATTR on a posix file, a new
- LV is created and mapped to posix file. So every block device will
- have a representative file in POSIX brick with 'user.glusterfs.bd'
- (BD_XATTR) set.
-* Here after all operations on this file results in LV related
- operations.
-
-For example, opening a file that has BD_XATTR set results in opening
-the LV block device, reading results in reading the corresponding LV
-block device.
-
-When BD xlator gets request to set BD_XATTR via setxattr call, it
-creates a LV and information about this LV is placed in the xattr of the
-posix file. xattr "user.glusterfs.bd" used to identify that posix file
-is mapped to BD.
-
-Usage:
-Server side:
-
- [root@host1 ~]# gluster volume create bdvol host1:/storage/vg1_info?vg1 host2:/storage/vg2_info?vg2
-
-It creates a distributed gluster volume 'bdvol' with Volume Group vg1
-using posix brick /storage/vg1_info in host1 and Volume Group vg2 using
-/storage/vg2_info in host2.
-
-
- [root@host1 ~]# gluster volume start bdvol
-
-Client side:
-
- [root@node ~]# mount -t glusterfs host1:/bdvol /media
- [root@node ~]# touch /media/posix
-
-It creates regular posix file 'posix' in either host1:/vg1 or host2:/vg2 brick
-
- [root@node ~]# mkdir /media/image
-
- [root@node ~]# touch /media/image/lv1
-
-
-It also creates regular posix file 'lv1' in either host1:/vg1 or
-host2:/vg2 brick
-
- [root@node ~]# setfattr -n "user.glusterfs.bd" -v "lv" /media/image/lv1
-
- [root@node ~]#
-
-
-Above setxattr results in creating a new LV in corresponding brick's VG
-and it sets 'user.glusterfs.bd' with value 'lv:<default-extent-size''
-
-
- [root@node ~]# truncate -s5G /media/image/lv1
-
-
-It results in resizig LV 'lv1'to 5G
-
-New BD xlator code is placed in `xlators/storage/bd` directory.
-
-Also add volume-uuid to the VG so that same VG cannot be used for other
-bricks/volumes. After deleting a gluster volume, one has to manually
-remove the associated tag using vgchange <vg-name> --deltag
-`<trusted.glusterfs.volume-id:<volume-id>>`
-
-
-#### Exposing volume capabilities
-
-With multiple storage translators (posix and bd) being supported in GlusterFS, it becomes
-necessary to know the volume type so that user can issue appropriate calls that are relevant
-only to the a given volume type. Hence there needs to be a way to expose the type of
-the storage translator of the volume to the user.
-
-BD xlator is capable of providing server offloaded file copy, server/storage offloaded
-zeroing of a file etc. This capabilities should be visible to the client/user, so that these
-features can be exploited.
-
-BD xlator exports capability information through gluster volume info (and --xml) output. For eg:
-
-`snip of gluster volume info output for a BD based volume`
-
- Xlator 1: BD
- Capability 1: thin
-
-`snip of gluster volume info --xml output for a BD based volume`
-
- <xlators>
- <xlator>
- <name>BD</name>
- <capabilities>
- <capability>thin</capability>
- </capabilities>
- </xlator>
- </xlators>
-
-But this capability information should also exposed through some other means so that a host
-which is not part of Gluster peer could also avail this capabilities.
-
-* Type
-
-BD translator supports both regular files and block device, i,e., one can create files on
-GlusterFS volume backed by BD translator and this file could end up as regular posix file or
-a logical volume (block device) based on the user''s choice. User can do a setxattr on the
-created file to convert it to a logical volume.
-
-Users of BD backed volume like QEMU would like to know that it is working with BD type of volume
-so that it can issue an additional setxattr call after creating a VM image on GlusterFS backend.
-This is necessary to ensure that the created VM image is backed by LV instead of file.
-
-There are different ways to expose this information (BD type of volume) to user.
-One way is to export it via a `getxattr` call. That said, When a client issues getxattr("volume_type")
-on a root gfid, bd xlator will return 1 implying its BD xlator. But posix xlator will return ENODATA
-and client code can interpret this as posix xlator. Also capability list can be returned via
-getxattr("caps") for root gfid.
-
-* Capabilities
-
-BD xlator supports new features such as server offloaded file copy, thin provisioned VM images etc.
-
-There is no standard way of exploiting these features from client side (such as syscall
-to exploit server offloaded copy). So these features need to be exported to the client so that
-they can be used. BD xlator latest version exports these capabilities information through
-gluster volume info (and --xml) output. But if a client is not part of GlusterFS peer
-it can''t run volume info command to get the list of capabilities of a given GlusterFS volume.
-For example, GlusterFS block driver in qemu need to get the capability list so that these features are used.
-
-
-
-Parts of this documentation were originally published here
-#http://raobharata.wordpress.com/2013/11/27/glusterfs-block-device-translator/
diff --git a/doc/developer-guide/brickmux-thread-reduction.md b/doc/developer-guide/brickmux-thread-reduction.md
new file mode 100644
index 00000000000..7d76e8ff579
--- /dev/null
+++ b/doc/developer-guide/brickmux-thread-reduction.md
@@ -0,0 +1,64 @@
+# Resource usage reduction in brick multiplexing
+
+Each brick is regresented with a graph of translators in a brick process.
+Each translator in the graph has its own set of threads and mem pools
+and other system resources allocations. Most of the times all these
+resources are not put to full use. Reducing the resource consumption
+of each brick is a problem in itself that needs to be addressed. The other
+aspect to it is, sharing of resources across brick graph, this becomes
+critical in brick multiplexing scenario. In this document we will be discussing
+only about the threads.
+
+If a brick mux process hosts 50 bricks there are atleast 600+ threads created
+in that process. Some of these are global threads that are shared by all the
+brick graphs, and others are per translator threads. The global threads like
+synctask threads, timer threads, sigwaiter, poller etc. are configurable and
+do not needs to be reduced. The per translator threads keeps growing as the
+number of bricks in the process increases. Each brick spawns atleast 10+
+threads:
+- io-threads
+- posix threads:
+ 1. Janitor
+ 2. Fsyncer
+ 3. Helper
+ 4. aio-thread
+- changelog and bitrot threads(even when the features are not enabled)
+
+## io-threads
+
+io-threads should be made global to the process, having 16+ threads for
+each brick does not make sense. But io-thread translator is loaded in
+the graph, and the position of io-thread translator decides from when
+the fops will be parallelised across threads. We cannot entirely move
+the io-threads to libglusterfs and say the multiplexing happens from
+the master translator or so. Hence, the io-thread orchestrator code
+is moved to libglusterfs, which ensures there is only one set of
+io-threads that is shared among the io-threads translator in each brick.
+This poses performance issues due to lock-contention in the io-threds
+layer. This also shall be addressed by having multiple locks instead of
+one global lock for io-threads.
+
+## Posix threads
+Most of the posix threads execute tasks in a timely manner, hence it can be
+replaced with a timer whose handler register a task to synctask framework, once
+the task is complete, the timer is registered again. With this we can eliminate
+the need of one thread for each task. The problem with using synctasks is
+the performance impact it will have due to make/swapcontext. For task that
+does not involve network wait, we need not do makecontext, instead the task
+function with arg can be stored and executed when a synctask thread is free.
+We need to implement an api in synctask to execute atomic tasks(no network wait)
+without the overhead of make/swapcontext. This will solve the performance
+impact associated with using synctask framework.
+
+And the other challenge, is to cancel all the tasks pending from a translator.
+This is important to cleanly detach brick. For this, we need to implement an
+api in synctask that can cancel all the tasks from a given translator.
+
+For future, this will be replced to use global thread-pool(once implemented).
+
+## Changelog and bitrot threads
+
+In the initial implementation, the threads are not created if the feature is
+not enabled. We need to share threads across changelog instances if we plan
+to enable these features in brick mux scenario.
+
diff --git a/doc/developer-guide/coding-standard.md b/doc/developer-guide/coding-standard.md
index a8aa49cc2da..031c6c0da99 100644
--- a/doc/developer-guide/coding-standard.md
+++ b/doc/developer-guide/coding-standard.md
@@ -6,23 +6,23 @@ Before you get started
Before starting with other part of coding standard, install `clang-format`
On Fedora:
-
+```
$ dnf install clang
-
+```
On debian/Ubuntu:
-
+```
$ apt-get install clang
-
+```
Once you are done with all the local changes, you need to run below set of commands,
before submitting the patch for review.
-
- $ git add $file # if any
- $ git commit -a -s -m "commit message"
- $ git show --pretty="format:" --name-only | grep -v "contrib/" | egrep "*\.[ch]$" | xargs clang-format -i
- $ git diff # see if there are any changes
- $ git commit -a --amend # get the format changes done
- $ ./submit-for-review.sh
-
+```
+$ git add $file # if any
+$ git commit -a -s -m "commit message"
+$ git show --pretty="format:" --name-only | grep -v "contrib/" | egrep "*\.[ch]$" | xargs clang-format -i
+$ git diff # see if there are any changes
+$ git commit -a --amend # get the format changes done
+$ ./submit-for-review.sh
+```
Structure definitions should have a comment per member
@@ -620,8 +620,8 @@ If a value isn't supposed/expected to change, there's no cost to adding a
### Avoid global variables (including 'static' auto variables)
Almost all state in Gluster is contextual and should be contained in the
-appropriate structure reflecting its scope (e.g. call\_frame\_t, call\_stack\_t,
-xlator\_t, glusterfs\_ctx\_t). With dynamic loading and graph switches in play,
+appropriate structure reflecting its scope (e.g. `call\_frame\_t`, `call\_stack\_t`,
+`xlator\_t`, `glusterfs\_ctx\_t`). With dynamic loading and graph switches in play,
each global requires careful consideration of when it should be initialized or
reinitialized, when it might _accidentally_ be reinitialized, when its value
might become stale, and so on. A few global variables are needed to serve as
diff --git a/doc/developer-guide/commit-guidelines.md b/doc/developer-guide/commit-guidelines.md
index 9b191dac178..38bbe525cbd 100644
--- a/doc/developer-guide/commit-guidelines.md
+++ b/doc/developer-guide/commit-guidelines.md
@@ -118,7 +118,7 @@ The 'bug' line can reference a bug in a few ways. Gerrit creates a link to the b
**Updates: bz#1193929** -- use 'Updates: bz#NNNN' if the commit is only a partial fix and more work is needed.
**Updates: #175** -- use 'Updates: #NNNN' if the commit is only a partial fix and more work is needed for the feature completion.
-We encourage the use of Co-Authored-By: name <name@example.com> in commit messages to indicate people who worked on a particular patch. It's a convention for recognizing multiple authors, and our projects would encourage the stats tools to observe it when collecting statistics.
+We encourage the use of `Co-Authored-By: name <name@example.com>` in commit messages to indicate people who worked on a particular patch. It's a convention for recognizing multiple authors, and our projects would encourage the stats tools to observe it when collecting statistics.
### Summary of Git commit message structure
diff --git a/doc/developer-guide/datastructure-inode.md b/doc/developer-guide/datastructure-inode.md
index a340ab9ca8e..45d7a941e5f 100644
--- a/doc/developer-guide/datastructure-inode.md
+++ b/doc/developer-guide/datastructure-inode.md
@@ -1,6 +1,6 @@
-#Inode and dentry management in GlusterFS:
+# Inode and dentry management in GlusterFS:
-##Background
+## Background
Filesystems internally refer to files and directories via inodes. Inodes
are unique identifiers of the entities stored in a filesystem. Whenever an
application has to operate on a file/directory (read/modify), the filesystem
@@ -41,11 +41,10 @@ struct _inode_table {
};
```
-#Life-cycle
+# Life-cycle
```
-
inode_table_new (size_t lru_limit, xlator_t *xl)
-
+```
This is a function which allocates a new inode table. Usually the top xlators in
the graph such as protocol/server (for bricks), fuse and nfs (for fuse and nfs
mounts) and libgfapi do inode managements. Hence they are the ones which will
@@ -59,11 +58,8 @@ new inode table.
Thus an allocated inode table is destroyed only when the filesystem daemon is
killed or unmounted.
-```
-
-#what it contains.
-```
+# what it contains.
Inode table in glusterfs mainly contains a hash table for maintaining inodes.
In general a file/directory is considered to be existing if there is a
corresponding inode present in the inode table. If a inode for a file/directory
@@ -76,21 +72,21 @@ size of the hash table (as of now it is hard coded to 14057. The hash value of
a inode is calculated using its gfid).
Apart from the hash table, inode table also maintains 3 important list of inodes
-1) Active list:
+1. Active list:
Active list contains all the active inodes (i.e inodes which are currently part
of some fop).
-2) Lru list:
+2. Lru list:
Least recently used inodes list. A limit can be set for the size of the lru
list. For bricks it is 16384 and for clients it is infinity.
-3) Purge list:
+3. Purge list:
List of all the inodes which have to be purged (i.e inodes which have to be
deleted from the inode table due to unlink/rmdir/forget).
And at last it also contains the mem-pool for allocating inodes, dentries so
that frequent malloc/calloc and free of the data structures can be avoided.
-```
-#Data structure (inode)
+
+# Data structure (inode)
```
struct _inode {
inode_table_t *table; /* the table this inode belongs to */
@@ -108,7 +104,7 @@ struct _inode {
struct _inode_ctx *_ctx; /* place holder for keeping the
information about the inode by different xlators */
};
-
+```
As said above, inodes are internal way of identifying the files/directories. A
inode uniquely represents a file/directory. A new inode is created whenever a
create/mkdir/symlink/mknod operations are performed. Apart from that a new inode
@@ -128,9 +124,9 @@ inodes are those inodes whose refcount is greater than zero. Whenever some
operation comes on a file/directory, and the resolver tries to find the inode
for it, it increments the refcount of the inode before returning the inode. The
refcount of an inode can be incremented by calling the below function
-
+```
inode_ref (inode_t *inode)
-
+```
Any xlator which wants to operate on a inode as part of some fop (or wants the
inode in the callback), should hold a ref on the inode.
Once the fop is completed before sending the reply of the fop to the above
@@ -139,18 +135,18 @@ zero, it is removed from the active inodes list and put into LRU list maintained
by the inode table. Thus in short if some fop is happening on a file/directory,
the corresponding inode will be in the active list or it will be in the LRU
list.
-```
-#Life Cycle
+
+# Life Cycle
A new inode is created whenever a new file/directory/symlink is created OR a
successful lookup of an existing entry is done. The xlators which does inode
management (as of now protocol/server, fuse, nfs, gfapi) will perform inode_link
operation upon successful lookup or successful creation of a new entry.
-
+```
inode_link (inode_t *inode, inode_t *parent, const char *name,
struct iatt *buf);
-
+```
inode_link actually adds the inode to the inode table (to be precise it adds
the inode to the hash table maintained by the inode table. The hash value is
calculated based on the gfid). Copies the gfid to the inode (the gfid is
@@ -160,7 +156,7 @@ A inode is removed from the inode table and eventually destroyed when unlink
or rmdir operation is performed on a file/directory, or the the lru limit of
the inode table has been exceeded.
-#Data structure (dentry)
+# Data structure (dentry)
```
struct _dentry {
@@ -170,22 +166,22 @@ struct _dentry {
char *name; /* name of the directory entry */
inode_t *parent; /* directory of the entry */
};
-
+```
A dentry is the presence of an entry for a file/directory within its parent
directory. A dentry usually points to the inode to which it belongs to. In
glusterfs a dentry contains the following fields.
-1) a hook using which it can add itself to the list of
+1. a hook using which it can add itself to the list of
the dentries maintained by the inode to which it points to.
-2) A hash table pointer.
-3) Pointer to the inode to which it belongs to.
-4) Name of the dentry
-5) Pointer to the inode of the parent directory in which the dentry is present
+2. A hash table pointer.
+3. Pointer to the inode to which it belongs to.
+4. Name of the dentry
+5. Pointer to the inode of the parent directory in which the dentry is present
A new dentry is created when a new file/directory/symlink is created or a hard
link to an existing file is created.
-
+```
__dentry_create (inode_t *inode, inode_t *parent, const char *name);
-
+```
A dentry holds a refcount on the parent
directory so that the parent inode is never removed from the active inode's list
and put to the lru list (If the lru limit of the lru list is exceeded, there is
@@ -212,15 +208,14 @@ deleted due to file removal or lru limit being exceeded the inode is retired
purge list maintained by the inode table), the nlookup count is set to 0 via
inode_forget api. The inode table, then prunes all the inodes from the purge
list by destroying the inode contexts maintained by each xlator.
-
+```
unlinking of the dentry is done via inode_unlink;
void
inode_unlink (inode_t *inode, inode_t *parent, const char *name);
-
+```
If the inode has multiple hard links, then the unlink operation performed by
the application results just in the removal of the dentry with the name provided
by the application. For the inode to be removed, all the dentries of the inode
should be unlinked.
-```
diff --git a/doc/developer-guide/datastructure-iobuf.md b/doc/developer-guide/datastructure-iobuf.md
index 5f521f1485f..03604e3672c 100644
--- a/doc/developer-guide/datastructure-iobuf.md
+++ b/doc/developer-guide/datastructure-iobuf.md
@@ -1,6 +1,6 @@
-#Iobuf-pool
-##Datastructures
-###iobuf
+# Iobuf-pool
+## Datastructures
+### iobuf
Short for IO Buffer. It is one allocatable unit for the consumers of the IOBUF
API, each unit hosts @page_size(defined in arena structure) bytes of memory. As
initial step of processing a fop, the IO buffer passed onto GlusterFS by the
@@ -28,7 +28,7 @@ struct iobuf {
};
```
-###iobref
+### iobref
There may be need of multiple iobufs for a single fop, like in vectored read/write.
Hence multiple iobufs(default 16) are encapsulated under one iobref.
```
@@ -40,7 +40,7 @@ struct iobref {
int used; /* number of iobufs added to this iobref */
};
```
-###iobuf_arenas
+### iobuf_arenas
One region of memory MMAPed from the operating system. Each region MMAPs
@arena_size bytes of memory, and hosts @arena_size / @page_size IOBUFs.
The same sized iobufs are grouped into one arena, for sanity of access.
@@ -77,7 +77,7 @@ struct iobuf_arena {
};
```
-###iobuf_pool
+### iobuf_pool
Pool of Iobufs. As there may be many Io buffers required by the filesystem,
a pool of iobufs are preallocated and kept, if these preallocated ones are
exhausted only then the standard malloc/free is called, thus improving the
@@ -139,8 +139,8 @@ arenas in the purge list are destroyed only if there is atleast one arena in
(e.g: If there is an arena (page_size=128KB, count=32) in purge list, this arena
is destroyed(munmap) only if there is an arena in 'arenas' list with page_size=128KB).
-##APIs
-###iobuf_get
+## APIs
+### iobuf_get
```
struct iobuf *iobuf_get (struct iobuf_pool *iobuf_pool);
@@ -149,7 +149,7 @@ Creates a new iobuf of the default page size(128KB hard coded as of yet).
Also takes a reference(increments ref count), hence no need of doing it
explicitly after getting iobuf.
-###iobuf_get2
+### iobuf_get2
```
struct iobuf * iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size);
@@ -179,7 +179,7 @@ if (requested iobuf size > Max iobuf size in the pool(1MB as of yet))
Also takes a reference(increments ref count), hence no need of doing it
explicitly after getting iobuf.
-###iobuf_ref
+### iobuf_ref
```
struct iobuf *iobuf_ref (struct iobuf *iobuf);
@@ -188,7 +188,7 @@ struct iobuf *iobuf_ref (struct iobuf *iobuf);
xlator/function/, its a good practice to take a reference so that iobuf is not
deleted by the allocator.
-###iobuf_unref
+### iobuf_unref
```
void iobuf_unref (struct iobuf *iobuf);
```
@@ -203,33 +203,33 @@ Unreference the iobuf, if the ref count is zero iobuf is considered free.
Every iobuf_ref should have a corresponding iobuf_unref, and also every
iobuf_get/2 should have a correspondning iobuf_unref.
-###iobref_new
+### iobref_new
```
struct iobref *iobref_new ();
```
Creates a new iobref structure and returns its pointer.
-###iobref_ref
+### iobref_ref
```
struct iobref *iobref_ref (struct iobref *iobref);
```
Take a reference on the iobref.
-###iobref_unref
+### iobref_unref
```
void iobref_unref (struct iobref *iobref);
```
Decrements the reference count of the iobref. If the ref count is 0, then unref
all the iobufs(iobuf_unref) in the iobref, and destroy the iobref.
-###iobref_add
+### iobref_add
```
int iobref_add (struct iobref *iobref, struct iobuf *iobuf);
```
Adds the given iobuf into the iobref, it takes a ref on the iobuf before adding
it, hence explicit iobuf_ref is not required if adding to the iobref.
-###iobref_merge
+### iobref_merge
```
int iobref_merge (struct iobref *to, struct iobref *from);
```
@@ -239,13 +239,13 @@ on all the iobufs added to the 'to' iobref. Hence iobref_unref should be
performed both on 'from' and 'to' iobrefs (performing iobref_unref only on 'to'
will not free the iobufs and may result in leak).
-###iobref_clear
+### iobref_clear
```
void iobref_clear (struct iobref *iobref);
```
Unreference all the iobufs in the iobref, and also unref the iobref.
-##Iobuf Leaks
+## Iobuf Leaks
If all iobuf_refs/iobuf_new do not have correspondning iobuf_unref, then the
iobufs are not freed and recurring execution of such code path may lead to huge
memory leaks. The easiest way to identify if a memory leak is caused by iobufs
diff --git a/doc/developer-guide/datastructure-mem-pool.md b/doc/developer-guide/datastructure-mem-pool.md
index c71aa2a8ddd..225567cbf9f 100644
--- a/doc/developer-guide/datastructure-mem-pool.md
+++ b/doc/developer-guide/datastructure-mem-pool.md
@@ -1,5 +1,5 @@
-#Mem-pool
-##Background
+# Mem-pool
+## Background
There was a time when every fop in glusterfs used to incur cost of allocations/de-allocations for every stack wind/unwind between xlators because stack/frame/*_localt_t in every wind/unwind was allocated and de-allocated. Because of all these system calls in the fop path there was lot of latency and the worst part is that most of the times the number of frames/stacks active at any time wouldn't cross a threshold. So it was decided that this threshold number of frames/stacks would be allocated in the beginning of the process only once. Get one of them from the pool of stacks/frames whenever `STACK_WIND` is performed and put it back into the pool in `STACK_UNWIND`/`STACK_DESTROY` without incurring any extra system calls. The data structures are allocated only when threshold number of such items are in active use i.e. pool is in complete use.% increase in the performance once this was added to all the common data structures (inode/fd/dict etc) in xlators throughout the stack was tremendous.
## Data structure
@@ -27,7 +27,7 @@ will be served from here until all the elements in the pool are in use i.e. cold
};
```
-##Life-cycle
+## Life-cycle
```
mem_pool_new (data_type, unsigned long count)
@@ -120,5 +120,5 @@ mem_pool_destroy (struct mem_pool *pool)
Deletes this pool from the `global_list` maintained by `glusterfs-ctx` and frees all the memory allocated in `mem_pool_new`.
-###How to pick pool-size
+### How to pick pool-size
This varies from work-load to work-load. Create the mem-pool with some random size and run the work-load. Take the statedump after the work-load is complete. In the statedump if `max_alloc` is always less than `cold_count` may be reduce the size of the pool closer to `max_alloc`. On the otherhand if there are lots of `pool-misses` then increase the `pool_size` by `max_stdalloc` to achieve better 'hit-rate' of the pool.
diff --git a/doc/developer-guide/dirops-transactions-in-dht.md b/doc/developer-guide/dirops-transactions-in-dht.md
index 83f63be3f45..909a97001aa 100644
--- a/doc/developer-guide/dirops-transactions-in-dht.md
+++ b/doc/developer-guide/dirops-transactions-in-dht.md
@@ -1,3 +1,4 @@
+# dirops transactions in dht
Need for transactions during operations on directories arise from two
basic design elements of DHT:
@@ -269,4 +270,4 @@ And the examples are:
* Both _dst_ and _dst/dst_ have same gfid - _gfid-src_. As observed earlier, symptom might be directory listing being incomplete
- mkdir (dst) vs renamedir ("src", "dst")
- rmdir (src) vs renamedir ("src", "dst")
- - rmdir (dst) vs renamedir ("src", "dst") \ No newline at end of file
+ - rmdir (dst) vs renamedir ("src", "dst")
diff --git a/doc/developer-guide/fuse-interrupt.md b/doc/developer-guide/fuse-interrupt.md
index f92b5532eaf..ec991b81ec5 100644
--- a/doc/developer-guide/fuse-interrupt.md
+++ b/doc/developer-guide/fuse-interrupt.md
@@ -23,9 +23,10 @@ not exported to a header file).
```
enum fuse_interrupt_state {
- INTERRUPT_NONE,
+ /* ... */
INTERRUPT_SQUELCHED,
INTERRUPT_HANDLED,
+ /* ... */
};
typedef enum fuse_interrupt_state fuse_interrupt_state_t;
struct fuse_interrupt_record;
@@ -62,8 +63,58 @@ dummy implementation only for demonstration purposes.) Flush is chosen
because a `FLUSH` interrupt is easy to trigger (see
*tests/features/interrupt.t*). Interrupt handling for flush is switched on
by `--fuse-flush-handle-interrupt` (a hidden glusterfs command line flag).
-The flush interrupt handling code is guarded by the
-`flush_handle_interrupt` Boolean member of `fuse_private_t`.
+The implementation of flush interrupt is contained in the
+`fuse_flush_interrupt_handler()` function and blocks guarded by the
+
+```
+if (priv->flush_handle_interrupt) { ...
+```
+
+conditional (where `priv` is a `*fuse_private_t`).
+
+### Overview
+
+"Regular" fuse fops and interrupt handlers interact via a list containing
+interrupt records.
+
+If a fop wishes to have its interrupts handled, it needs to set up an
+interrupt record and insert it into the list; also when it's to finish
+(ie. in its "cbk" stage) it needs to delete the record from the list.
+
+If no interrupt happens, basically that's all to it - a list insertion
+and deletion.
+
+However, if an interrupt comes for the fop, the interrupt FUSE request
+will carry the data identifying an ongoing fop (that is, its `unique`),
+and based on that, the interrupt record will be looked up in the list, and
+the specific interrupt handler (a member of the interrupt record) will be
+called.
+
+Usually the fop needs to share some data with the interrupt handler to
+enable it to perform its task (also shared via the interrupt record).
+The interrupt API offers two approaches to manage shared data:
+- _Async or reference-counting strategy_: from the point on when the interrupt
+ record is inserted to the list, it's owned jointly by the regular fop and
+ the prospective interrupt handler. Both of them need to check before they
+ return if the other is still holding a reference; if not, then they are
+ responsible for reclaiming the shared data.
+- _Sync or borrow strategy_: the interrupt handler is considered a borrower
+ of the shared data. The interrupt handler should not reclaim the shared
+ data. The fop will wait for the interrupt handler to finish (ie., the borrow
+ to be returned), then it has to reclaim the shared data.
+
+The user of the interrupt API need to call the following functions to
+instrument this control flow:
+- `fuse_interrupt_record_insert()` in the fop to insert the interrupt record to
+ the list;
+- `fuse_interrupt_finish_fop()`in the fop (cbk) and
+- `fuse_interrupt_finish_interrupt()`in the interrupt handler
+
+to perform needed synchronization at the end their tenure. The data management
+strategies are implemented by the `fuse_interrupt_finish_*()` functions (which
+have an argument to specify which strategy to use); these routines take care
+of freeing the interrupt record itself, while the reclamation of the shared data
+is left to the API user.
### Usage
@@ -75,12 +126,15 @@ steps:
call (directly or as async callback) `fuse_interrupt_finish_interrupt()`.
The `intstat` argument to `fuse_interrupt_finish_interrupt` should be
either `INTERRUPT_SQUELCHED` or `INTERRUPT_HANDLED`.
- - `INTERRUPT_SQUELCHED` means that we choose not to handle the interrupt
+ - `INTERRUPT_SQUELCHED` means that the interrupt could not be delivered
and the fop is going on uninterrupted.
- `INTERRUPT_HANDLED` means that the interrupt was actually handled. In
this case the fop will be answered from interrupt context with errno
`EINTR` (that is, the fop should not send a response to the kernel).
+ (the enum `fuse_interrupt_state` includes further members, which are reserved
+ for internal use).
+
We return to the `sync` and `datap` arguments later.
- In the `fuse_<FOP>` function create an interrupt record using
`fuse_interrupt_record_new()`, passing the incoming `fuse_in_header` and
@@ -92,10 +146,10 @@ steps:
`fuse_interrupt_record_insert()`.
- In `fuse_<FOP>_cbk` call `fuse_interrupt_finish_fop()`.
- `fuse_interrupt_finish_fop()` returns a Boolean according to whether the
- interrupt was handled. If it was, then the fuse request is already
+ interrupt was handled. If it was, then the FUSE request is already
answered and the stack gets destroyed in `fuse_interrupt_finish_fop` so
- `fuse_<FOP>_cbk` can just return (zero). Otherwise follow the standard
- cbk logic (answer the fuse request and destroy the stack -- these are
+ `fuse_<FOP>_cbk()` can just return (zero). Otherwise follow the standard
+ cbk logic (answer the FUSE request and destroy the stack -- these are
typically accomplished by `fuse_err_cbk()`).
- The last two argument of `fuse_interrupt_finish_fop()` and
`fuse_interrupt_finish_interrupt()` are `gf_boolean_t sync` and
@@ -124,7 +178,34 @@ steps:
then that pointer will be directed to the `data` member of the interrupt
record and it's up to the caller what it's doing with it.
- If `sync` is true, interrupt handler can use `datap = NULL`, and
- fop handler will have `datap` set.
+ fop handler will have `datap` point to a valid pointer.
- If `sync` is false, and handlers pass a pointer to a pointer for
`datap`, they should check if the pointed pointer is NULL before
attempting to deal with the data.
+
+### FUSE answer for the interrupted fop
+
+The kernel acknowledges a successful interruption for a given FUSE request
+if the filesystem daemon answers it with errno EINTR; upon that, the syscall
+which induced the request will be abruptly terminated with an interrupt, rather
+than returning a value.
+
+In glusterfs, this can be arranged in two ways.
+
+- If the interrupt handler wins the race for the interrupt record, ie.
+ `fuse_interrupt_finish_fop()` returns true to `fuse_<FOP>_cbk()`, then, as
+ said above, `fuse_<FOP>_cbk()` does not need to answer the FUSE request.
+ That's because then the interrupt handler will take care about answering
+ it (with errno EINTR).
+- If `fuse_interrupt_finish_fop()` returns false to `fuse_<FOP>_cbk()`, then
+ this return value does not inform the fop handler whether there was an interrupt
+ or not. This return value occurs both when fop handler won the race for the
+ interrupt record against the interrupt handler, and when there was no interrupt
+ at all.
+
+ However, the internal logic of the fop handler might detect from other
+ circumstances that an interrupt was delivered. For example, the fop handler
+ might be sleeping, waiting for some data to arrive, so that a premature
+ wakeup (with no data present) occurs if the interrupt handler intervenes. In
+ such cases it's the responsibility of the fop handler to reply the FUSE
+ request with errro EINTR.
diff --git a/doc/developer-guide/identifying-resource-leaks.md b/doc/developer-guide/identifying-resource-leaks.md
index 851fc4424bc..950cae79b0a 100644
--- a/doc/developer-guide/identifying-resource-leaks.md
+++ b/doc/developer-guide/identifying-resource-leaks.md
@@ -174,3 +174,27 @@ In this case, the resource leak can be addressed by adding a single line to the
Running the same Valgrind command and comparing the output will show that the
memory leak in `xlators/meta/src/meta.c:init` is not reported anymore.
+
+### Running DRD, the Valgrind thread error detector
+
+When configuring GlusterFS with:
+
+```shell
+./configure --enable-valgrind
+```
+
+the default Valgrind tool (Memcheck) is enabled. But it's also possble to select
+one of Memcheck or DRD by using:
+
+```shell
+./configure --enable-valgrind=memcheck
+```
+
+or:
+
+```shell
+./configure --enable-valgrind=drd
+```
+
+respectively. When using DRD, it's recommended to consult
+https://valgrind.org/docs/manual/drd-manual.html before running.
diff --git a/doc/developer-guide/logging-guidelines.md b/doc/developer-guide/logging-guidelines.md
index 58adf944b67..0e6b2588535 100644
--- a/doc/developer-guide/logging-guidelines.md
+++ b/doc/developer-guide/logging-guidelines.md
@@ -62,7 +62,7 @@ There are 2 interfaces provided to log messages,
headers (like the time stamp, dom, errnum etc.). The primary users of
the above interfaces are, when printing the final graph, or printing
the configuration when a process is about dump core or abort, or
- printing the backtrace when a process recieves a critical signal
+ printing the backtrace when a process receives a critical signal
- These interfaces should not be used outside the scope of the users
above, unless you know what you are doing
diff --git a/doc/developer-guide/network_compression.md b/doc/developer-guide/network_compression.md
index 7327591ef63..1222a765276 100644
--- a/doc/developer-guide/network_compression.md
+++ b/doc/developer-guide/network_compression.md
@@ -1,9 +1,9 @@
-#On-Wire Compression + Decompression
+# On-Wire Compression + Decompression
The 'compression translator' compresses and decompresses data in-flight
between client and bricks.
-###Working
+### Working
When a writev call occurs, the client compresses the data before sending it to
brick. On the brick, compressed data is decompressed. Similarly, when a readv
call occurs, the brick compresses the data before sending it to client. On the
@@ -19,7 +19,7 @@ During normal operation, this is the format of data sent over wire:
The trailer contains the CRC32 checksum and length of original uncompressed
data. This is used for validation.
-###Usage
+### Usage
Turning on compression xlator:
@@ -27,7 +27,7 @@ Turning on compression xlator:
gluster volume set <vol_name> network.compression on
~~~
-###Configurable parameters (optional)
+### Configurable parameters (optional)
**Compression level**
~~~
@@ -35,10 +35,10 @@ gluster volume set <vol_name> network.compression.compression-level 8
~~~
~~~
-0 : no compression
-1 : best speed
-9 : best compression
--1 : default compression
+ 0 : no compression
+ 1 : best speed
+ 9 : best compression
+-1 : default compression
~~~
**Minimum file size**
@@ -55,7 +55,7 @@ Other less frequently used parameters include `network.compression.mem-level`
and `network.compression.window-size`. More details can about these options
can be found by running `gluster volume set help` command.
-###Known Issues and Limitations
+### Known Issues and Limitations
* Compression translator cannot work with striped volumes.
* Mount point hangs when writing a file with write-behind xlator turned on. To
@@ -65,7 +65,7 @@ set`performance.strict-write-ordering` to on.
distribute volumes. This limitation is caused by AFR not being able to
propagate xdata. This issue has been fixed in glusterfs versions > 3.5
-###TODO
+### TODO
Although zlib offers high compression ratio, it is very slow. We can make the
translator pluggable to add support for other compression methods such as
[lz4 compression](https://code.google.com/p/lz4/)
diff --git a/doc/developer-guide/options-to-contribute.md b/doc/developer-guide/options-to-contribute.md
index 5dd1895cb1c..3f0d84e7645 100644
--- a/doc/developer-guide/options-to-contribute.md
+++ b/doc/developer-guide/options-to-contribute.md
@@ -60,7 +60,7 @@ Reference: https://review.gluster.org/20925/
5. Now, pick a `.h` file, and see if a structure is very large, and see
-if re-aligning them as per [coding-standard](./conding-standard.md) gives any size benefit,
+if re-aligning them as per [coding-standard](./coding-standard.md) gives any size benefit,
if yes, go ahead and change it. Make sure you check all the structures
in the file for similar pattern.
diff --git a/doc/developer-guide/syncop.md b/doc/developer-guide/syncop.md
index 4e30451f30e..bcc8bd08e01 100644
--- a/doc/developer-guide/syncop.md
+++ b/doc/developer-guide/syncop.md
@@ -1,4 +1,4 @@
-#syncop framework
+# syncop framework
A coroutines-based, cooperative multi-tasking framework.
## Topics
diff --git a/doc/developer-guide/thread-naming.md b/doc/developer-guide/thread-naming.md
index 204cd7681b4..513140d4437 100644
--- a/doc/developer-guide/thread-naming.md
+++ b/doc/developer-guide/thread-naming.md
@@ -29,10 +29,10 @@ gf_thread_create_detached (pthread_t *thread,
As max name length for a thread in POSIX is only 16 characters including the
'\0' character, you have to be a little creative with naming. Also, it is
important that all Gluster threads have common prefix. Considering these
-conditions, we have "gluster" as prefix for all the threads created by these
+conditions, we have "glfs_" as prefix for all the threads created by these
wrapper functions. It is responsibility of the owner of thread to provide the
suffix part of the name. It does not have to be a descriptive name, as it has
-only 8 letters to work with. However, it should be unique enough such that it
+only 10 letters to work with. However, it should be unique enough such that it
can be matched with a table which describes it.
If n number of threads are spwaned to perform same function, it is must that the
@@ -87,6 +87,7 @@ such that it can be matched with a table below without ambiguity.
- posixfsy - posix fsync
- posixhc - posix heal
- posixjan - posix janitor
+- posixrsv - posix reserve
- quiesce - quiesce dequeue
- rdmaAsyn - rdma async event handler
- rdmaehan - rdma completion handler
@@ -99,6 +100,5 @@ such that it can be matched with a table below without ambiguity.
- spoller - socket poller
- sprocN - syncop worker thread
- tbfclock - token bucket filter token generator thread
-- tierfixl - tier fix layout
- timer - timer thread
- upreaper - upcall reaper
diff --git a/doc/developer-guide/translator-development.md b/doc/developer-guide/translator-development.md
index 3bf7e153354..f75935519f6 100644
--- a/doc/developer-guide/translator-development.md
+++ b/doc/developer-guide/translator-development.md
@@ -472,7 +472,7 @@ hello
Now let's interrupt the process and see where we are.
```
-^C
+
Program received signal SIGINT, Interrupt.
0x0000003a0060b3dc in pthread_cond_wait@@GLIBC_2.3.2 ()
from /lib64/libpthread.so.0
@@ -680,4 +680,4 @@ Original author's site:
Gluster community site:
- * [Translators](http://www.gluster.org/community/documentation/index.php/Translators)
+ * [Translators](https://docs.gluster.org/en/latest/Quick-Start-Guide/Architecture/#translators)
diff --git a/doc/developer-guide/xlator-classification.md b/doc/developer-guide/xlator-classification.md
index 36d924d0934..6073df9375f 100644
--- a/doc/developer-guide/xlator-classification.md
+++ b/doc/developer-guide/xlator-classification.md
@@ -23,7 +23,7 @@ This document is intended for the following community participants,
- Existing xlator maintainers
- Packaging and gluster management stack maintainers
-For a more user facing understanding it is recommended to read section [tbd](TBD)
+For a more user facing understanding it is recommended to read section (TBD)
in the gluster documentation.
## Categories
diff --git a/doc/features/ctime.md b/doc/features/ctime.md
index 6c6cad90055..74a77abed4b 100644
--- a/doc/features/ctime.md
+++ b/doc/features/ctime.md
@@ -1,25 +1,25 @@
-#Consistent time attributes in gluster across replica/distribute
+# Consistent time attributes in gluster across replica/distribute
-####Problem:
+#### Problem:
Traditionally gluster has been using time attributes (ctime, atime, mtime) of files/dirs from bricks. The problem with this approach is that, it is not consisteant across replica and distribute bricks. And applications which depend on it breaks as replica might not always return time attributes from same brick.
Tar especially gives "file changed as we read it" whenever it detects ctime differences when stat is served from different bricks. The way we have been trying to solve it is to serve the stat structures from same brick in afr, max-time in dht. But it doesn't avoid the problem completely. Because there is no way to change ctime at the moment(lutimes() only allows mtime, atime), there is little we can do to make sure ctimes match after self-heals/xattr updates/rebalance.
-####Solution Proposed:
+#### Solution Proposed:
Store time attribues (ctime, mtime, atime) as an xattr of the file. The xattr is updated based
on the fop. If a filesystem fop changes only mtime and ctime, update only those in xattr for
that file.
-####Design Overview:
+#### Design Overview:
1) As part of each fop, top layer will generate a time stamp and pass it to the down along
with other information
-> - This will bring a dependency for NTP synced clients along with servers
-> - There can be a diff in time if the fop stuck in the xlator for various reason,
+ - This will bring a dependency for NTP synced clients along with servers
+ - There can be a diff in time if the fop stuck in the xlator for various reason,
for ex: because of locks.
2) On the server, posix layer stores the value in the memory (inode ctx) and will sync the data periodically to the disk as an extended attr
-> - Of course sync call also will force it. And fop comes for an inode which is not linked, we do the sync immediately.
+ - Of course sync call also will force it. And fop comes for an inode which is not linked, we do the sync immediately.
3) Each time when inodes are created or initialized it read the data from disk and store in inode ctx.
@@ -29,19 +29,19 @@ for ex: because of locks.
6) File ops that changes the parent directory attr time need to be consistent across all the distributed directories across the subvolumes. (for eg: a create call will change ctime and mtime of parent dir)
-> - This has to handle separately because we only send the fop to the hashed subvolume.
-> - We can asynchronously send the timeupdate setattr fop to the other subvoumes and change the values for parent directory if the file fops is successful on hashed subvolume.
-> - This will have a window where the times are inconsistent across dht subvolume (Please provide your suggestions)
+ - This has to handle separately because we only send the fop to the hashed subvolume.
+ - We can asynchronously send the timeupdate setattr fop to the other subvoumes and change the values for parent directory if the file fops is successful on hashed subvolume.
+ - This will have a window where the times are inconsistent across dht subvolume (Please provide your suggestions)
7) Currently we have couple of mount options for time attributes like noatime, relatime , nodiratime etc. But we are not explicitly handled those options even if it is given as mount option when gluster mount.
-####Implementation Overview:
+#### Implementation Overview:
This features involves changes in following xlators.
-> - utime xlator
-> - posix xlator
+ - utime xlator
+ - posix xlator
-#####utime xlator:
+##### utime xlator:
This is a new client side xlator which does following tasks.
1. It will generate a time stamp and passes it down in frame->root->ctime and over the network.
@@ -50,7 +50,7 @@ This is a new client side xlator which does following tasks.
Patches:
1. https://review.gluster.org/#/c/19857/
-#####posix xlator:
+##### posix xlator:
Following tasks are done in posix xlator:
1. Provides APIs to set and get the xattr from backend. It also caches the xattr in inode context. During get, it updates time attributes stored in xattr into iatt structure.
@@ -61,7 +61,7 @@ Following tasks are done in posix xlator:
2. https://review.gluster.org/#/c/19795/
3. https://review.gluster.org/#/c/19796/
-####Pending Work:
+#### Pending Work:
1. Handling of time related mount options (noatime, realatime,etc)
2. flag based create (depending on flags in open, create behaviour might change)
3. Changes in dht for direcotory sync acrosss multiple subvolumes
diff --git a/doc/gluster.8 b/doc/gluster.8
index da6472d6005..ba595edca15 100644
--- a/doc/gluster.8
+++ b/doc/gluster.8
@@ -38,10 +38,10 @@ Display information about all volumes, or the specified volume.
\fB\ volume list \fR
List all volumes in cluster
.TP
-\fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad|tierd]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR
+\fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR
Display status of all or specified volume(s)/brick
.TP
-\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [disperse [<COUNT>]] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR
+\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] [disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... <TA-BRICK> \fR
Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option.
.TP
@@ -113,6 +113,9 @@ Rotate the log file for corresponding volume/brick.
\fB\ volume profile <VOLNAME> {start|info [peek|incremental [peek]|cumulative|clear]|stop} [nfs] \fR
Profile operations on the volume. Once started, volume profile <volname> info provides cumulative statistics of the FOPs performed.
.TP
+\fB\ volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>] \fR
+Generates a profile of a volume representing the performance and bottlenecks/hotspots of each brick.
+.TP
\fB\ volume statedump <VOLNAME> [[nfs|quotad] [all|mem|iobuf|callpool|priv|fd|inode|history]... | [client <hostname:process-id>]] \fR
Dumps the in memory state of the specified process or the bricks of the volume.
.TP
@@ -134,32 +137,6 @@ List all the nodes in the pool (including localhost)
.TP
\fB\ peer help \fR
Display help for the peer command.
-.SS "Tier Commands"
-.TP
-\fB\ volume tier <VOLNAME> attach [<replica COUNT>] <NEW-BRICK>... \fR
-Attach to an existing volume a tier of specified type using the specified bricks.
-.TP
-\fB\ volume tier <VOLNAME> start [force] \fR
-Start the tier service for <VOLNAME>
-.TP
-\fB\ volume tier <VOLNAME> status \fR
-Display statistics on data migration between the hot and cold tiers.
-.TP
-\fB\ volume tier <VOLNAME> stop [force] \fR
-Stop the tier service for <VOLNAME>
-.TP
-\fB\ volume tier <VOLNAME> detach start\fR
-Begin detaching the hot tier from the volume. Data will be moved from the hot tier to the cold tier.
-.TP
-\fB\ volume tier <VOLNAME> detach commit [force]\fR
-Commit detaching the hot tier from the volume. The volume will revert to its original state before the hot tier was attached.
-.TP
-\fB\ volume tier <VOLNAME> detach status\fR
-Check status of data movement from the hot to cold tier.
-.TP
-\fB\ volume tier <VOLNAME> detach stop\fR
-Stop detaching the hot tier from the volume.
-
.SS "Quota Commands"
.TP
\fB\ volume quota <VOLNAME> enable \fR
@@ -241,6 +218,12 @@ Use "!<OPTION>" to reset option <OPTION> to default value.
\fB\ volume bitrot <VOLNAME> {enable|disable} \fR
Enable/disable bitrot for volume <VOLNAME>
.TP
+\fB\ volume bitrot <VOLNAME> signing-time <time-in-secs> \fR
+Waiting time for an object after last fd is closed to start signing process.
+.TP
+\fB\ volume bitrot <VOLNAME> signer-threads <count> \fR
+Number of signing process threads. Usually set to number of available cores.
+.TP
\fB\ volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive} \fR
Scrub-throttle value is a measure of how fast or slow the scrubber scrubs the filesystem for volume <VOLNAME>
.TP
diff --git a/doc/glusterd.8 b/doc/glusterd.8
index 3ef7c2b72d1..e3768c78761 100644
--- a/doc/glusterd.8
+++ b/doc/glusterd.8
@@ -30,8 +30,8 @@ File to use for logging.
\fB\-L <LOGLEVEL>, \fB\-\-log\-level=<LOGLEVEL>\fR
Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
.TP
-\fB\-L, \fB\-\-localtime\-logging=on|off\fR
-Enable or disable localtime log timestamps. Valid options are on and off (the default is off).
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
.TP
\fB\-\-debug\fR
Run the program in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
diff --git a/doc/glusterfs.8 b/doc/glusterfs.8
index 592dedb6759..3d359ea85e4 100644
--- a/doc/glusterfs.8
+++ b/doc/glusterfs.8
@@ -53,8 +53,8 @@ Maximum number of connect attempts to server. This option should be provided wit
\fB\-\-acl\fR
Mount the filesystem with POSIX ACL support.
.TP
-\fB\-L, \fB\-\-localtime\-logging=on|off\fR
-Enable or disable localtime log timestamps. Valid options are on and off (the default is off).
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
.TP
\fB\-\-debug\fR
Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG,
@@ -63,8 +63,8 @@ and \fB\-\-log\-file\fR to console.
\fB\-\-enable\-ino32=BOOL\fR
Use 32-bit inodes when mounting to workaround application that doesn't support 64-bit inodes.
.TP
-\fB\-\-fopen\-keep\-cache\fR
-Do not purge the cache on file open.
+\fB\-\-fopen\-keep\-cache[=BOOL]\fR
+Do not purge the cache on file open (default: false).
.TP
\fB\-\-mac\-compat=BOOL\fR
Provide stubs for attributes needed for seamless operation on Macs (the default is off).
@@ -139,6 +139,11 @@ Enable fuse in-kernel writeback cache.
\fB\-\-negative\-timeout=SECONDS\fR
Set negative timeout to SECONDS in fuse kernel module (the default is 0).
.TP
+\fB\-\-auto\-invalidation=BOOL\fR
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on].
+.TP
\fB\-\-volfile-check\fR
Enable strict volume file checking.
diff --git a/doc/glusterfsd.8 b/doc/glusterfsd.8
index c5a95d1611f..bc1de2a8c80 100644
--- a/doc/glusterfsd.8
+++ b/doc/glusterfsd.8
@@ -51,8 +51,8 @@ Server to get the volume from. This option overrides \fB\-\-volfile option
.PP
.TP
-\fB\-L, \fB\-\-localtime\-logging=on|off\fR
-Enable or disable localtime log timestamps. Valid options are on and off (the default is off).
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
.TP
\fB\-\-debug\fR
Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
@@ -107,6 +107,11 @@ Enable/Disable direct-io mode in fuse module [default: enable]
.TP
\fB\-\-resolve-gids\fR
Resolve all auxiliary groups in fuse translator (max 32 otherwise)
+.TP
+\fB\-\-auto\-invalidation=BOOL\fR
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on]
.SS "Miscellaneous Options"
.PP
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 367f02d9b1a..ce16e9e40b7 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -44,8 +44,8 @@ INFO and NONE [default: INFO]
\fBacl
Mount the filesystem with POSIX ACL support
.TP
-\fBfopen\-keep\-cache
-Do not purge the cache on file open
+\fBfopen\-keep\-cache[=BOOL]
+Do not purge the cache on file open (default: false)
.TP
\fBworm
Mount the filesystem in 'worm' mode
@@ -122,6 +122,15 @@ Provide list of backup volfile servers in the following format [default: None]
\fBDeprecated\fR option - placed here for backward compatibility [default: 1]
.TP
.TP
+\fBlru-limit=\fRN
+Set fuse module's limit for number of inodes kept in LRU list to N [default: 65536]
+.TP
+.TP
+\fBinvalidate-limit=\fRN
+Suspend fuse invalidations implied by 'lru-limit' if number of outstanding
+invalidations reaches N
+.TP
+.TP
\fBbackground-qlen=\fRN
Set fuse module's background queue length to N [default: 64]
.TP
@@ -142,6 +151,11 @@ Enable fuse in-kernel writeback cache [default: off]
.TP
\fBattr\-times\-granularity=\fRNS
Declare supported granularity of file attribute [default: 0]
+.TP
+\fBauto\-invalidation=\fRBOOL
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on]
.PP
.SH FILES
.TP
diff --git a/events/src/Makefile.am b/events/src/Makefile.am
index 4e83a469cc2..3b229691897 100644
--- a/events/src/Makefile.am
+++ b/events/src/Makefile.am
@@ -5,7 +5,7 @@ EXTRA_DIST = glustereventsd.py __init__.py eventsapiconf.py.in \
BUILT_SOURCES = eventtypes.py
CLEANFILES = eventtypes.py
-eventsdir = $(GLUSTERFS_LIBEXECDIR)/events
+eventsdir = $(GLUSTERFS_LIBEXECDIR)/gfevents
if BUILD_EVENTS
events_PYTHON = __init__.py gf_event.py eventsapiconf.py eventtypes.py \
utils.py
@@ -28,7 +28,7 @@ eventspeerscript_SCRIPTS = peer_eventsapi.py
install-exec-hook:
$(mkdir_p) $(DESTDIR)$(sbindir)
rm -f $(DESTDIR)$(sbindir)/glustereventsd
- ln -s $(GLUSTERFS_LIBEXECDIR)/events/glustereventsd.py \
+ ln -s $(GLUSTERFS_LIBEXECDIR)/gfevents/glustereventsd.py \
$(DESTDIR)$(sbindir)/glustereventsd
rm -f $(DESTDIR)$(sbindir)/gluster-eventsapi
ln -s $(GLUSTERFS_LIBEXECDIR)/peer_eventsapi.py \
diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in
index 76b5954d325..700093bee60 100644
--- a/events/src/eventsapiconf.py.in
+++ b/events/src/eventsapiconf.py.in
@@ -28,6 +28,8 @@ def get_glusterd_workdir():
return glusterd_workdir
SERVER_ADDRESS = "0.0.0.0"
+SERVER_ADDRESSv4 = "0.0.0.0"
+SERVER_ADDRESSv6 = "::1"
DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json"
CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json"
CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC
diff --git a/events/src/gf_event.py b/events/src/gf_event.py
index 0c7b7ec92d7..260b0d9aa48 100644
--- a/events/src/gf_event.py
+++ b/events/src/gf_event.py
@@ -12,10 +12,10 @@
import socket
import time
-from .eventsapiconf import SERVER_ADDRESS, EVENTS_ENABLED
-from .eventtypes import all_events
+from gfevents.eventsapiconf import SERVER_ADDRESS, EVENTS_ENABLED
+from gfevents.eventtypes import all_events
-from .utils import logger, setup_logger, get_config
+from gfevents.utils import logger, setup_logger, get_config
# Run this when this lib loads
setup_logger()
diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py
index 324695ecea3..341a3b60947 100644
--- a/events/src/glustereventsd.py
+++ b/events/src/glustereventsd.py
@@ -13,6 +13,7 @@
from __future__ import print_function
import sys
import signal
+import threading
try:
import socketserver
except ImportError:
@@ -23,15 +24,25 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
from eventtypes import all_events
import handlers
import utils
-from eventsapiconf import SERVER_ADDRESS, PID_FILE
+from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE
from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES
from utils import logger, PidFile, PidFileLockFailed, boolify
+# Subclass so that specifically IPv4 packets are captured
+class UDPServerv4(socketserver.ThreadingUDPServer):
+ address_family = socket.AF_INET
-class GlusterEventsRequestHandler(SocketServer.BaseRequestHandler):
+# Subclass so that specifically IPv6 packets are captured
+class UDPServerv6(socketserver.ThreadingUDPServer):
+ address_family = socket.AF_INET6
+
+class GlusterEventsRequestHandler(socketserver.BaseRequestHandler):
def handle(self):
data = self.request[0].strip()
+ if sys.version_info >= (3,):
+ data = self.request[0].strip().decode("utf-8")
+
logger.debug("EVENT: {0} from {1}".format(repr(data),
self.client_address[0]))
try:
@@ -86,6 +97,10 @@ def signal_handler_sigusr2(sig, frame):
utils.restart_webhook_pool()
+def UDP_server_thread(sock):
+ sock.serve_forever()
+
+
def init_event_server():
utils.setup_logger()
utils.load_all()
@@ -96,15 +111,26 @@ def init_event_server():
sys.stderr.write("Unable to get Port details from Config\n")
sys.exit(1)
- # Start the Eventing Server, UDP Server
+ # Creating the Eventing Server, UDP Server for IPv4 packets
+ try:
+ serverv4 = UDPServerv4((SERVER_ADDRESSv4, port),
+ GlusterEventsRequestHandler)
+ except socket.error as e:
+ sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e))
+ sys.exit(1)
+ # Creating the Eventing Server, UDP Server for IPv6 packets
try:
- server = SocketServer.ThreadingUDPServer(
- (SERVER_ADDRESS, port),
- GlusterEventsRequestHandler)
+ serverv6 = UDPServerv6((SERVER_ADDRESSv6, port),
+ GlusterEventsRequestHandler)
except socket.error as e:
- sys.stderr.write("Failed to start Eventsd: {0}\n".format(e))
+ sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e))
sys.exit(1)
- server.serve_forever()
+ server_thread1 = threading.Thread(target=UDP_server_thread,
+ args=(serverv4,))
+ server_thread2 = threading.Thread(target=UDP_server_thread,
+ args=(serverv6,))
+ server_thread1.start()
+ server_thread2.start()
def get_args():
diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py
index fbfb1d66bb2..4d2e5f35b1c 100644
--- a/events/src/peer_eventsapi.py
+++ b/events/src/peer_eventsapi.py
@@ -27,28 +27,28 @@ from gluster.cliutils import (Cmd, node_output_ok, node_output_notok,
sync_file_to_peers, GlusterCmdException,
output_error, execute_in_peers, runcli,
set_common_args_func)
-from events.utils import LockedOpen, get_jwt_token, save_https_cert
-
-from events.eventsapiconf import (WEBHOOKS_FILE_TO_SYNC,
- WEBHOOKS_FILE,
- DEFAULT_CONFIG_FILE,
- CUSTOM_CONFIG_FILE,
- CUSTOM_CONFIG_FILE_TO_SYNC,
- EVENTSD,
- CONFIG_KEYS,
- BOOL_CONFIGS,
- INT_CONFIGS,
- PID_FILE,
- RESTART_CONFIGS,
- ERROR_INVALID_CONFIG,
- ERROR_WEBHOOK_NOT_EXISTS,
- ERROR_CONFIG_SYNC_FAILED,
- ERROR_WEBHOOK_ALREADY_EXISTS,
- ERROR_PARTIAL_SUCCESS,
- ERROR_ALL_NODES_STATUS_NOT_OK,
- ERROR_SAME_CONFIG,
- ERROR_WEBHOOK_SYNC_FAILED,
- CERTS_DIR)
+from gfevents.utils import LockedOpen, get_jwt_token, save_https_cert
+
+from gfevents.eventsapiconf import (WEBHOOKS_FILE_TO_SYNC,
+ WEBHOOKS_FILE,
+ DEFAULT_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE_TO_SYNC,
+ EVENTSD,
+ CONFIG_KEYS,
+ BOOL_CONFIGS,
+ INT_CONFIGS,
+ PID_FILE,
+ RESTART_CONFIGS,
+ ERROR_INVALID_CONFIG,
+ ERROR_WEBHOOK_NOT_EXISTS,
+ ERROR_CONFIG_SYNC_FAILED,
+ ERROR_WEBHOOK_ALREADY_EXISTS,
+ ERROR_PARTIAL_SUCCESS,
+ ERROR_ALL_NODES_STATUS_NOT_OK,
+ ERROR_SAME_CONFIG,
+ ERROR_WEBHOOK_SYNC_FAILED,
+ CERTS_DIR)
def handle_output_error(err, errcode=1, json_output=False):
@@ -173,8 +173,10 @@ def sync_to_peers(args):
try:
sync_file_to_peers(WEBHOOKS_FILE_TO_SYNC)
except GlusterCmdException as e:
+ # Print stdout if stderr is empty
+ errmsg = e.message[2] if e.message[2] else e.message[1]
handle_output_error("Failed to sync Webhooks file: [Error: {0}]"
- "{1}".format(e[0], e[2]),
+ "{1}".format(e.message[0], errmsg),
errcode=ERROR_WEBHOOK_SYNC_FAILED,
json_output=args.json)
@@ -182,8 +184,10 @@ def sync_to_peers(args):
try:
sync_file_to_peers(CUSTOM_CONFIG_FILE_TO_SYNC)
except GlusterCmdException as e:
+ # Print stdout if stderr is empty
+ errmsg = e.message[2] if e.message[2] else e.message[1]
handle_output_error("Failed to sync Config file: [Error: {0}]"
- "{1}".format(e[0], e[2]),
+ "{1}".format(e.message[0], errmsg),
errcode=ERROR_CONFIG_SYNC_FAILED,
json_output=args.json)
@@ -349,8 +353,7 @@ class WebhookModCmd(Cmd):
errcode=ERROR_WEBHOOK_NOT_EXISTS,
json_output=args.json)
- if isinstance(data[args.url], str) or \
- isinstance(data[args.url], unicode):
+ if isinstance(data[args.url], str):
data[args.url]["token"] = data[args.url]
if args.bearer_token != "":
diff --git a/events/src/utils.py b/events/src/utils.py
index fcdcbd47acc..6d4e0791a2b 100644
--- a/events/src/utils.py
+++ b/events/src/utils.py
@@ -9,9 +9,11 @@
# cases as published by the Free Software Foundation.
#
+import sys
import json
import os
import logging
+import logging.handlers
import fcntl
from errno import EBADF
from threading import Thread
@@ -26,13 +28,15 @@ import hmac
from hashlib import sha256
from calendar import timegm
-from .eventsapiconf import (LOG_FILE,
- WEBHOOKS_FILE,
- DEFAULT_CONFIG_FILE,
- CUSTOM_CONFIG_FILE,
- UUID_FILE,
- CERTS_DIR)
-from . import eventtypes
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from gfevents.eventsapiconf import (LOG_FILE,
+ WEBHOOKS_FILE,
+ DEFAULT_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE,
+ UUID_FILE,
+ CERTS_DIR)
+from gfevents import eventtypes
# Webhooks list
@@ -95,7 +99,7 @@ def setup_logger():
logger.setLevel(logging.INFO)
# create the logging file handler
- fh = logging.FileHandler(LOG_FILE)
+ fh = logging.handlers.WatchedFileHandler(LOG_FILE)
formatter = logging.Formatter("[%(asctime)s] %(levelname)s "
"[%(module)s - %(lineno)s:%(funcName)s] "
@@ -387,7 +391,7 @@ class PidFile(object):
def webhook_monitor(proc_queue, webhooks):
queues = {}
for url, data in webhooks.items():
- if isinstance(data, str) or isinstance(data, unicode):
+ if isinstance(data, str):
token = data
secret = None
else:
diff --git a/extras/Makefile.am b/extras/Makefile.am
index a1c657659f3..983f014cca6 100644
--- a/extras/Makefile.am
+++ b/extras/Makefile.am
@@ -11,23 +11,25 @@ EditorModedir = $(docdir)
EditorMode_DATA = glusterfs-mode.el glusterfs.vim
SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \
- $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils
+ $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \
+ ganesha
confdir = $(sysconfdir)/glusterfs
if WITH_SERVER
conf_DATA = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \
logger.conf.example glusterfs-georep-logrotate group-virt.example \
group-metadata-cache group-gluster-block group-nl-cache \
- group-db-workload
+ group-db-workload group-distributed-virt group-samba
endif
voldir = $(sysconfdir)/glusterfs
+vol_DATA = thin-arbiter/thin-arbiter.vol
if WITH_SERVER
-vol_DATA = glusterd.vol
+vol_DATA += glusterd.vol
endif
scriptsdir = $(datadir)/glusterfs/scripts
-scripts_SCRIPTS =
+scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh
if WITH_SERVER
scripts_SCRIPTS += post-upgrade-script-for-quota.sh \
pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh
@@ -40,14 +42,15 @@ endif
EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \
logger.conf.example glusterfs-georep-logrotate group-virt.example \
group-metadata-cache group-gluster-block group-nl-cache \
- group-db-workload specgen.scm glusterfs-mode.el glusterfs.vim \
+ group-db-workload group-samba specgen.scm glusterfs-mode.el glusterfs.vim \
migrate-unify-to-distribute.sh backend-xattr-sanitize.sh \
backend-cleanup.sh disk_usage_sync.sh clear_xattrs.sh \
glusterd-sysconfig glusterd.vol post-upgrade-script-for-quota.sh \
pre-upgrade-script-for-quota.sh command-completion/gluster.bash \
command-completion/Makefile command-completion/README \
stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \
- control-cpu-load.sh control-mem.sh
+ control-cpu-load.sh control-mem.sh group-distributed-virt \
+ thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh
if WITH_SERVER
install-data-local:
@@ -67,4 +70,8 @@ install-data-local:
$(DESTDIR)$(GLUSTERD_WORKDIR)/groups/nl-cache
$(INSTALL_DATA) $(top_srcdir)/extras/group-db-workload \
$(DESTDIR)$(GLUSTERD_WORKDIR)/groups/db-workload
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-distributed-virt \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/distributed-virt
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-samba \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/samba
endif
diff --git a/extras/cliutils/README.md b/extras/cliutils/README.md
index e11166774e3..309beb1ca25 100644
--- a/extras/cliutils/README.md
+++ b/extras/cliutils/README.md
@@ -221,7 +221,7 @@ required.(Under `%files` section)
- gluster-mountbroker http://review.gluster.org/14544
- gluster-eventsapi http://review.gluster.org/14248
- gluster-georep-sshkey http://review.gluster.org/14732
-- gluster-restapi https://github.com/aravindavk/glusterfs-restapi
+- gluster-restapi https://github.com/gluster/restapi
## Limitations/TODOs
- Not yet possible to create CLI without any subcommand, For example
diff --git a/extras/cliutils/cliutils.py b/extras/cliutils/cliutils.py
index 0095586827d..55fbaf56704 100644
--- a/extras/cliutils/cliutils.py
+++ b/extras/cliutils/cliutils.py
@@ -20,7 +20,14 @@ _common_args_func = lambda p: True
class GlusterCmdException(Exception):
- pass
+ def __init__(self, message):
+ self.message = message
+ try:
+ # Python 3
+ super().__init__(message)
+ except TypeError:
+ # Python 2
+ super(GlusterCmdException, self).__init__(message)
def get_node_uuid():
diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh
index b739c821055..52dcf62fd9f 100755
--- a/extras/control-cpu-load.sh
+++ b/extras/control-cpu-load.sh
@@ -104,7 +104,7 @@ echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup."
echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us
if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
- for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
+ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
do
echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
done
diff --git a/extras/control-mem.sh b/extras/control-mem.sh
index 38aa2a08748..91b36f8107a 100755
--- a/extras/control-mem.sh
+++ b/extras/control-mem.sh
@@ -116,7 +116,7 @@ else
fi
if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
- for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`;
+ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
do
echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
done
diff --git a/extras/create_new_xlator/generate_xlator.py b/extras/create_new_xlator/generate_xlator.py
index e334b0d7642..983868c04db 100755
--- a/extras/create_new_xlator/generate_xlator.py
+++ b/extras/create_new_xlator/generate_xlator.py
@@ -169,7 +169,7 @@ def load_fragments ():
cur_value = ""
result = {}
basepath = os.path.abspath(os.path.dirname(__file__))
- fragpath = basepath + "/new-xlator-tmpl.c"
+ fragpath = basepath + "/new-xlator.c.tmpl"
for line in open(fragpath, "r").readlines():
m = pragma_re.search(line)
if m:
diff --git a/extras/create_new_xlator/new-xlator-tmpl.c b/extras/create_new_xlator/new-xlator.c.tmpl
index caa10b374d1..fe9735bfcf1 100644
--- a/extras/create_new_xlator/new-xlator-tmpl.c
+++ b/extras/create_new_xlator/new-xlator.c.tmpl
@@ -1,32 +1,30 @@
#pragma fragment CBK_TEMPLATE
-int32_t @FOP_PREFIX @_ @NAME
- @_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, @UNWIND_PARAMS @)
+int32_t @FOP_PREFIX@_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, @UNWIND_PARAMS@)
{
- STACK_UNWIND_STRICT(@NAME @, frame, op_ret, op_errno, @UNWIND_ARGS @);
+ STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @UNWIND_ARGS@);
return 0;
}
#pragma fragment COMMENT
If you are generating the leaf xlators, remove the STACK_WIND and replace the
- @ERROR_ARGS @to @UNWIND_ARGS @ if necessary
+ @ERROR_ARGS@ to @UNWIND_ARGS@ if necessary
#pragma fragment FOP_TEMPLATE
- int32_t @FOP_PREFIX @_ @NAME
- @(call_frame_t *frame, xlator_t *this, @WIND_PARAMS @)
+ int32_t @FOP_PREFIX@_@NAME@(call_frame_t *frame, xlator_t *this, @WIND_PARAMS@)
{
- STACK_WIND(frame, @FOP_PREFIX @_ @NAME @_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->@NAME @, @WIND_ARGS @);
+ STACK_WIND(frame, @FOP_PREFIX@_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @WIND_ARGS@);
return 0;
err:
- STACK_UNWIND_STRICT(@NAME @, frame, -1, errno, @ERROR_ARGS @);
+ STACK_UNWIND_STRICT(@NAME@, frame, -1, errno, @ERROR_ARGS@);
return 0;
}
#pragma fragment FUNC_TEMPLATE
-@RET_TYPE @ @FOP_PREFIX @_ @NAME @(@FUNC_PARAMS @)
+@RET_TYPE@ @FOP_PREFIX@_@NAME@(@FUNC_PARAMS@)
{
- return @RET_VAR @;
+ return @RET_VAR@;
}
#pragma fragment CP
@@ -45,40 +43,40 @@ err:
#pragma fragment XLATOR_METHODS
-static int32_t @FOP_PREFIX @_init(xlator_t *this)
+static int32_t @FOP_PREFIX@_init(xlator_t *this)
{
return 0;
}
-static void @FOP_PREFIX @_fini(xlator_t *this)
+static void @FOP_PREFIX@_fini(xlator_t *this)
{
return;
}
-static int32_t @FOP_PREFIX @_reconfigure(xlator_t *this, dict_t *dict)
+static int32_t @FOP_PREFIX@_reconfigure(xlator_t *this, dict_t *dict)
{
return 0;
}
-static int @FOP_PREFIX @_notify(xlator_t *this, int event, void *data, ...)
+static int @FOP_PREFIX@_notify(xlator_t *this, int event, void *data, ...)
{
return default_notify(this, event, data);
}
-static int32_t @FOP_PREFIX @_mem_acct_init(xlator_t *this)
+static int32_t @FOP_PREFIX@_mem_acct_init(xlator_t *this)
{
int ret = -1;
- ret = xlator_mem_acct_init(this, gf_ @FOP_PREFIX @_mt_end + 1);
+ ret = xlator_mem_acct_init(this, gf_@FOP_PREFIX@_mt_end + 1);
return ret;
}
-static int32_t @FOP_PREFIX @_dump_metrics(xlator_t *this, int fd)
+static int32_t @FOP_PREFIX@_dump_metrics(xlator_t *this, int fd)
{
return 0;
}
-struct volume_options @FOP_PREFIX @_options[] = {
+struct volume_options @FOP_PREFIX@_options[] = {
/*{ .key = {""},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "",
@@ -93,50 +91,50 @@ struct volume_options @FOP_PREFIX @_options[] = {
};
xlator_api_t xlator_api = {
- .init = @FOP_PREFIX @_init,
- .fini = @FOP_PREFIX @_fini,
- .notify = @FOP_PREFIX @_notify,
- .reconfigure = @FOP_PREFIX @_reconfigure,
- .mem_acct_init = @FOP_PREFIX @_mem_acct_init,
- .dump_metrics = @FOP_PREFIX @_dump_metrics,
+ .init = @FOP_PREFIX@_init,
+ .fini = @FOP_PREFIX@_fini,
+ .notify = @FOP_PREFIX@_notify,
+ .reconfigure = @FOP_PREFIX@_reconfigure,
+ .mem_acct_init = @FOP_PREFIX@_mem_acct_init,
+ .dump_metrics = @FOP_PREFIX@_dump_metrics,
.op_version = {GD_OP_VERSION_},
- .dumpops = &@FOP_PREFIX @_dumpops,
- .fops = &@FOP_PREFIX @_fops,
+ .dumpops = &@FOP_PREFIX@_dumpops,
+ .fops = &@FOP_PREFIX@_fops,
.cbks = &@FOP_PREFIX @_cbks,
- .options = @FOP_PREFIX @_options,
+ .options = @FOP_PREFIX@_options,
.identifier = "@XL_NAME@",
.category = GF_EXPERIMENTAL,
};
#pragma fragment HEADER_FMT
-#ifndef __ @HFL_NAME @_H__
-#define __ @HFL_NAME @_H__
+#ifndef __ @HFL_NAME@_H__
+#define __ @HFL_NAME@_H__
#include "@XL_NAME@-mem-types.h"
#include "@XL_NAME@-messages.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#endif /* __@HFL_NAME@_H__ */
#pragma fragment MEM_HEADER_FMT
-#ifndef __ @HFL_NAME @_H__
-#define __ @HFL_NAME @_H__
+#ifndef __ @HFL_NAME@_H__
+#define __ @HFL_NAME@_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_mdc_mem_types_ {
- gf_ @FOP_PREFIX @_mt_ = gf_common_mt_end + 1,
- gf_ @FOP_PREFIX @_mt_end
+ gf_@FOP_PREFIX@_mt_ = gf_common_mt_end + 1,
+ gf_@FOP_PREFIX@_mt_end
};
#endif /* __@HFL_NAME@_H__ */
#pragma fragment MSG_HEADER_FMT
-#ifndef __ @HFL_NAME @_H__
-#define __ @HFL_NAME @_H__
+#ifndef __@HFL_NAME@_H__
+#define __@HFL_NAME@_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -148,6 +146,6 @@ enum gf_mdc_mem_types_ {
* glfs-message-id.h.
*/
-GLFS_MSGID(@FOP_PREFIX @, @FOP_PREFIX @_MSG_NO_MEMORY);
+GLFS_MSGID(@FOP_PREFIX@, @FOP_PREFIX@_MSG_NO_MEMORY);
#endif /* __@HFL_NAME@_H__ */
diff --git a/extras/distributed-testing/distributed-test-env b/extras/distributed-testing/distributed-test-env
index 0d57d5237dd..36fdd82e5dd 100644
--- a/extras/distributed-testing/distributed-test-env
+++ b/extras/distributed-testing/distributed-test-env
@@ -32,7 +32,6 @@ BROKEN_TESTS="\
tests/bugs/gfapi/bug-1093594.t\
tests/bugs/replicate/bug-1473026.t\
tests/bugs/replicate/bug-802417.t\
- tests/encryption/crypt.t\
tests/basic/inode-leak.t\
tests/basic/distribute/force-migration.t\
tests/basic/ec/heal-info.t\
diff --git a/extras/distributed-testing/distributed-test-runner.py b/extras/distributed-testing/distributed-test-runner.py
index 7bfb6c9652a..5a07e2feab1 100755
--- a/extras/distributed-testing/distributed-test-runner.py
+++ b/extras/distributed-testing/distributed-test-runner.py
@@ -383,14 +383,17 @@ class Handlers:
return self.shell.call("make install") == 0
@synchronized
- def prove(self, id, test, timeout, valgrind=False, asan_noleaks=True):
+ def prove(self, id, test, timeout, valgrind="no", asan_noleaks=True):
assert id == self.client_id
self.shell.cd(self.gluster_root)
env = "DEBUG=1 "
- if valgrind:
+ if valgrind == "memcheck" or valgrind == "yes":
cmd = "valgrind"
cmd += " --tool=memcheck --leak-check=full --track-origins=yes"
cmd += " --show-leak-kinds=all -v prove -v"
+ elif valgrind == "drd":
+ cmd = "valgrind"
+ cmd += " --tool=drd -v prove -v"
elif asan_noleaks:
cmd = "prove -v"
env += "ASAN_OPTIONS=detect_leaks=0 "
@@ -827,8 +830,9 @@ parser.add_argument("--port", help="server port to listen",
type=int, default=DEFAULT_PORT)
# test role
parser.add_argument("--tester", help="start tester", action="store_true")
-parser.add_argument("--valgrind", help="run tests under valgrind",
- action="store_true")
+parser.add_argument("--valgrind[=memcheck,drd]",
+ help="run tests with valgrind tool 'memcheck' or 'drd'",
+ default="no")
parser.add_argument("--asan", help="test with asan enabled",
action="store_true")
parser.add_argument("--asan-noleaks", help="test with asan but no mem leaks",
diff --git a/extras/ec-heal-script/README.md b/extras/ec-heal-script/README.md
new file mode 100644
index 00000000000..aaefd6681f6
--- /dev/null
+++ b/extras/ec-heal-script/README.md
@@ -0,0 +1,69 @@
+# gluster-heal-scripts
+Scripts to correct extended attributes of fragments of files to make them healble.
+
+Following are the guidelines/suggestions to use these scripts.
+
+1 - Passwordless ssh should be setup for all the nodes of the cluster.
+
+2 - Scripts should be executed from one of these nodes.
+
+3 - Make sure NO "IO" is going on for the files for which we are running
+these two scripts.
+
+4 - There should be no heal going on for the file for which xattrs are being
+set by correct_pending_heals.sh. Disable the self heal while running this script.
+
+5 - All the bricks of the volume should be UP to identify good and bad fragments
+and to decide if an entry is healble or not.
+
+6 - If correct_pending_heals.sh is stopped in the middle while it was processing
+healble entries, it is suggested to re-run gfid_needing_heal_parallel.sh to create
+latest list of healble and non healble entries and "potential_heal" "can_not_heal" files.
+
+7 - Based on the number of entries, these files might take time to get and set the
+stats and xattrs of entries.
+
+8 - A backup of the fragments will be taken on <brick path>/.glusterfs/correct_pending_heals
+ directory with a file name same as gfid.
+
+9 - Once the correctness of the file gets verified by user, these backup should be removed.
+
+10 - Make sure we have enough space on bricks to take these backups.
+
+11 - At the end this will create two files -
+ 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed.
+ 2 - can_not_heal - Contains list of files which can not be healed.
+
+12 - It is suggested that the integrity of the data of files, which were modified and healed,
+ should be checked by the user.
+
+
+Usage:
+
+Following are the sequence of steps to use these scripts -
+
+1 - ./gfid_needing_heal_parallel.sh <volume name>
+
+ Execute gfid_needing_heal_parallel.sh with volume name to create list of files which could
+ be healed and can not be healed. It creates "potential_heal" and "can_not_heal" files.
+ During execution, it also displays the list of files on consol with the verdict.
+
+2 - ./correct_pending_heals.sh
+
+ Execute correct_pending_heals.sh without any argument. This script processes entries present
+ in "heal" file. It asks user to enter how many files we want to process in one attempt.
+ Once the count is provided, this script will fetch the entries one by one from "potential_heal" file and takes necessary action.
+ If at this point also a file can not be healed, it will be pushed to "can_not_heal" file.
+ If a file can be healed, this script will modify the xattrs of that file fragments and create an entry in "modified_and_backedup_files" file
+
+3 - At the end, all the entries of "potential_heal" will be processed and based on the processing only two files will be left.
+
+ 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed.
+ 2 - can_not_heal - Contains list of files which can not be healed.
+
+Logs and other files -
+
+1 - modified_and_backedup_files - It contains all the files which could be healed and the location of backup of each fragments.
+2 - can_not_heal - It contains all the files which can not be healed.
+3 - potential_heal - List of files which could be healed and should be processed by "correct_pending_heals.sh"
+4 - /var/log/glusterfs/ec-heal-script.log - It contains logs of both the files.
diff --git a/extras/ec-heal-script/correct_pending_heals.sh b/extras/ec-heal-script/correct_pending_heals.sh
new file mode 100755
index 00000000000..c9f19dd7c89
--- /dev/null
+++ b/extras/ec-heal-script/correct_pending_heals.sh
@@ -0,0 +1,415 @@
+#!/bin/bash
+# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# This script finally resets the xattrs of all the fragments of a file
+# which can be healed as per gfid_needing_heal_parallel.sh.
+# gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal.
+# This script takes potential_heal as input and resets xattrs of all the fragments
+# of those files present in this file and which could be healed as per
+# trusted.ec.size xattar of the file else it will place the entry in can_not_heal
+# file. Those entries which must be healed will be place in must_heal file
+# after setting xattrs so that user can track those files.
+
+
+MOD_BACKUP_FILES="modified_and_backedup_files"
+CAN_NOT_HEAL="can_not_heal"
+LOG_DIR="/var/log/glusterfs"
+LOG_FILE="$LOG_DIR/ec-heal-script.log"
+LINE_SEP="==================================================="
+
+function heal_log()
+{
+ echo "$1" >> "$LOG_FILE"
+}
+
+function desc ()
+{
+ echo ""
+ echo "This script finally resets the xattrs of all the fragments of a file
+which can be healed as per gfid_needing_heal_parallel.sh.
+gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal.
+This script takes potential_heal as input and resets xattrs of all the fragments
+of those files present in this file and which could be healed as per
+trusted.ec.size xattar of the file else it will place the entry in can_not_heal
+file. Those entries which must be healed will be place in must_heal file
+after setting xattrs so that user can track those files."
+}
+
+function _init ()
+{
+ if [ $# -ne 0 ]
+ then
+ echo "usage: $0"
+ desc
+ exit 2
+ fi
+
+ if [ ! -f "potential_heal" ]
+ then
+ echo "Nothing to correct. File "potential_heal" does not exist"
+ echo ""
+ desc
+ exit 2
+ fi
+}
+
+function total_file_size_in_hex()
+{
+ local frag_size=$1
+ local size=0
+ local hex_size=""
+
+ size=$((frag_size * 4))
+ hex_size=$(printf '0x%016x' $size)
+ echo "$hex_size"
+}
+
+function backup_file_fragment()
+{
+ local file_host=$1
+ local file_entry=$2
+ local gfid_actual_paths=$3
+ local brick_root=""
+ local temp=""
+ local backup_dir=""
+ local cmd=""
+ local gfid=""
+
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+ temp=$(echo "$(basename "$BASH_SOURCE")" | cut -d '.' -f 1)
+ backup_dir=$(echo "${brick_root}/.glusterfs/${temp}")
+ file_entry=${file_entry//#}
+
+ gfid=$(echo "${gfid_actual_paths}" | cut -d '|' -f 1 | cut -d '/' -f 5)
+ echo "${file_host}:${backup_dir}/${gfid}" >> "$MOD_BACKUP_FILES"
+
+ cmd="mkdir -p ${backup_dir} && yes | cp -af ${file_entry} ${backup_dir}/${gfid} 2>/dev/null"
+ ssh -n "${file_host}" "${cmd}"
+}
+
+function set_frag_xattr ()
+{
+ local file_host=$1
+ local file_entry=$2
+ local good=$3
+ local cmd1=""
+ local cmd2=""
+ local cmd=""
+ local version="0x00000000000000010000000000000001"
+ local dirty="0x00000000000000010000000000000001"
+
+ if [[ $good -eq 0 ]]
+ then
+ version="0x00000000000000000000000000000000"
+ fi
+
+ cmd1=" setfattr -n trusted.ec.version -v ${version} ${file_entry} &&"
+ cmd2=" setfattr -n trusted.ec.dirty -v ${dirty} ${file_entry}"
+ cmd=${cmd1}${cmd2}
+ ssh -n "${file_host}" "${cmd}"
+}
+
+function set_version_dirty_xattr ()
+{
+ local file_paths=$1
+ local good=$2
+ local gfid_actual_paths=$3
+ local file_entry=""
+ local file_host=""
+ local bpath=""
+
+ for bpath in ${file_paths//,/ }
+ do
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ backup_file_fragment "$file_host" "$file_entry" "$gfid_actual_paths"
+ file_entry=${file_entry//#}
+ set_frag_xattr "$file_host" "$file_entry" "$good"
+ done
+}
+
+function match_size_xattr_quorum ()
+{
+ local file_paths=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local size_xattr=""
+ local bpath=""
+ declare -A xattr_count
+
+ for bpath in ${file_paths//,/ }
+ do
+ size_xattr=""
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ file_entry=${file_entry//#}
+
+ cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2"
+ size_xattr=$(ssh -n "${file_host}" "${cmd}")
+ if [[ -n $size_xattr ]]
+ then
+ count=$((xattr_count["$size_xattr"] + 1))
+ xattr_count["$size_xattr"]=${count}
+ if [[ $count -ge 4 ]]
+ then
+ echo "${size_xattr}"
+ return
+ fi
+ fi
+ done
+ echo "False"
+}
+
+function match_version_xattr ()
+{
+ local file_paths=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local version=""
+ local bpath=""
+ declare -A ver_count
+
+ for bpath in ${file_paths//,/ }
+ do
+ version=""
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ file_entry=${file_entry//#}
+
+ cmd="getfattr -n trusted.ec.version -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.version" | cut -d '=' -f 2"
+ version=$(ssh -n "${file_host}" "${cmd}")
+ ver_count["$version"]=$((ver_count["$version"] + 1))
+ done
+ for key in "${ver_count[@]}"
+ do
+ if [[ $key -ge 4 ]]
+ then
+ echo "True"
+ return
+ else
+ echo "False"
+ return
+ fi
+ done
+}
+
+function match_stat_size_with_xattr ()
+{
+ local bpath=$1
+ local size=$2
+ local file_stat=$3
+ local xattr=$4
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local stat_output=""
+ local hex_size=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+
+ file_entry=${file_entry//#}
+ cmd="stat --format=%F:%B:%s $file_entry 2>/dev/null"
+ stat_output=$(ssh -n "${file_host}" "${cmd}")
+ echo "$stat_output" | grep -w "${file_stat}" > /dev/null
+
+ if [[ $? -eq 0 ]]
+ then
+ cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2"
+ hex_size=$(ssh -n "${file_host}" "${cmd}")
+
+ if [[ -z $hex_size || "$hex_size" != "$xattr" ]]
+ then
+ echo "False"
+ return
+ fi
+ size_diff=$(printf '%d' $(( size - hex_size )))
+ if [[ $size_diff -gt 2047 ]]
+ then
+ echo "False"
+ return
+ else
+ echo "True"
+ return
+ fi
+ else
+ echo "False"
+ return
+ fi
+}
+
+function find_file_paths ()
+{
+ local bpath=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local brick_root=""
+ local gfid=""
+ local actual_path=""
+ local gfid_path=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+
+ gfid=$(echo "${file_entry}" | grep ".glusterfs")
+ if [[ -n "$gfid" ]]
+ then
+ gfid_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' "
+ actual_path=$(ssh -n "${file_host}" "${cmd}")
+ #removing absolute path so that user can refer this from mount point
+ actual_path=${actual_path#"$brick_root"}
+ else
+ actual_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' "
+ gfid_path=$(ssh -n "${file_host}" "${cmd}")
+ gfid_path=${gfid_path#"$brick_root"}
+ fi
+
+ echo "${gfid_path}|${actual_path}"
+}
+
+function log_can_not_heal ()
+{
+ local gfid_actual_paths=$1
+ local file_paths=$2
+ file_paths=${file_paths//#}
+
+ echo "${LINE_SEP}" >> "$CAN_NOT_HEAL"
+ echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL"
+ for bpath in ${file_paths//,/ }
+ do
+ echo "${bpath}" >> "$CAN_NOT_HEAL"
+ done
+}
+
+function check_all_frag_and_set_xattr ()
+{
+ local file_paths=$1
+ local total_size=$2
+ local file_stat=$3
+ local bpath=""
+ local healthy_count=0
+ local match="False"
+ local matching_bricks=""
+ local bad_bricks=""
+ local gfid_actual_paths=""
+
+ for bpath in ${file_paths//,/ }
+ do
+ if [[ -n "$gfid_actual_paths" ]]
+ then
+ break
+ fi
+ gfid_actual_paths=$(find_file_paths "$bpath")
+ done
+
+ match=$(match_size_xattr_quorum "$file_paths")
+
+# echo "${match} : $bpath" >> "$MOD_BACKUP_FILES"
+
+ if [[ "$match" != "False" ]]
+ then
+ xattr="$match"
+ for bpath in ${file_paths//,/ }
+ do
+ match="False"
+ match=$(match_stat_size_with_xattr "$bpath" "$total_size" "$file_stat" "$xattr")
+ if [[ "$match" == "True" ]]
+ then
+ matching_bricks="${bpath},${matching_bricks}"
+ healthy_count=$((healthy_count + 1))
+ else
+ bad_bricks="${bpath},${bad_bricks}"
+ fi
+ done
+ fi
+
+ if [[ $healthy_count -ge 4 ]]
+ then
+ match="True"
+ echo "${LINE_SEP}" >> "$MOD_BACKUP_FILES"
+ echo "Modified : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$MOD_BACKUP_FILES"
+ set_version_dirty_xattr "$matching_bricks" 1 "$gfid_actual_paths"
+ set_version_dirty_xattr "$bad_bricks" 0 "$gfid_actual_paths"
+ else
+ log_can_not_heal "$gfid_actual_paths" "${file_paths}"
+ fi
+
+ echo "$match"
+}
+function set_xattr()
+{
+ local count=$1
+ local heal_entry=""
+ local file_stat=""
+ local frag_size=""
+ local total_size=""
+ local file_paths=""
+ local num=""
+ local can_heal_count=0
+
+ heal_log "Started $(basename $BASH_SOURCE) on $(date) "
+
+ while read -r heal_entry
+ do
+ heal_log "$LINE_SEP"
+ heal_log "${heal_entry}"
+
+ file_stat=$(echo "$heal_entry" | cut -d "|" -f 1)
+ frag_size=$(echo "$file_stat" | rev | cut -d ":" -f 1 | rev)
+ total_size="$(total_file_size_in_hex "$frag_size")"
+ file_paths=$(echo "$heal_entry" | cut -d "|" -f 2)
+ match=$(check_all_frag_and_set_xattr "$file_paths" "$total_size" "$file_stat")
+ if [[ "$match" == "True" ]]
+ then
+ can_heal_count=$((can_heal_count + 1))
+ fi
+
+ sed -i '1d' potential_heal
+ count=$((count - 1))
+ if [ $count == 0 ]
+ then
+ num=$(cat potential_heal | wc -l)
+ heal_log "$LINE_SEP"
+ heal_log "${1} : Processed"
+ heal_log "${can_heal_count} : Modified to Heal"
+ heal_log "$((${1} - can_heal_count)) : Moved to can_not_heal."
+ heal_log "${num} : Pending as Potential Heal"
+ exit 0
+ fi
+
+ done < potential_heal
+}
+
+function main ()
+{
+ local count=0
+
+ read -p "Number of files to correct: [choose between 1-1000] (0 for All):" count
+ if [[ $count -lt 0 || $count -gt 1000 ]]
+ then
+ echo "Provide correct value:"
+ exit 2
+ fi
+
+ if [[ $count -eq 0 ]]
+ then
+ count=$(cat potential_heal | wc -l)
+ fi
+ set_xattr "$count"
+}
+
+_init "$@" && main "$@"
diff --git a/extras/ec-heal-script/gfid_needing_heal_parallel.sh b/extras/ec-heal-script/gfid_needing_heal_parallel.sh
new file mode 100755
index 00000000000..d7f53c97c33
--- /dev/null
+++ b/extras/ec-heal-script/gfid_needing_heal_parallel.sh
@@ -0,0 +1,278 @@
+#!/bin/bash
+# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# This script provides a list of all the files which can be healed or not healed.
+# It also generates two files, potential_heal and can_not_heal, which contains the information
+# of all theose files. These files could be used by correct_pending_heals.sh to correct
+# the fragmnets so that files could be healed by shd.
+
+CAN_NOT_HEAL="can_not_heal"
+CAN_HEAL="potential_heal"
+LINE_SEP="==================================================="
+LOG_DIR="/var/log/glusterfs"
+LOG_FILE="$LOG_DIR/ec-heal-script.log"
+
+function heal_log()
+{
+ echo "$1" >> "$LOG_FILE"
+}
+
+function _init ()
+{
+ if [ $# -ne 1 ]; then
+ echo "usage: $0 <gluster volume name>";
+ echo "This script provides a list of all the files which can be healed or not healed.
+It also generates two files, potential_heal and can_not_heal, which contains the information
+of all theose files. These files could be used by correct_pending_heals.sh to correct
+the fragmnets so that files could be healed by shd."
+ exit 2;
+ fi
+
+ volume=$1;
+}
+
+function get_pending_entries ()
+{
+ local volume_name=$1
+
+ gluster volume heal "$volume_name" info | grep -v ":/" | grep -v "Number of entries" | grep -v "Status:" | sort -u | sed '/^$/d'
+}
+
+function get_entry_path_on_brick()
+{
+ local path="$1"
+ local gfid_string=""
+ if [[ "${path:0:1}" == "/" ]];
+ then
+ echo "$path"
+ else
+ gfid_string="$(echo "$path" | cut -f2 -d':' | cut -f1 -d '>')"
+ echo "/.glusterfs/${gfid_string:0:2}/${gfid_string:2:2}/$gfid_string"
+ fi
+}
+
+function run_command_on_server()
+{
+ local subvolume="$1"
+ local host="$2"
+ local cmd="$3"
+ local output
+ output=$(ssh -n "${host}" "${cmd}")
+ if [ -n "$output" ]
+ then
+ echo "$subvolume:$output"
+ fi
+}
+
+function get_entry_path_all_bricks ()
+{
+ local entry="$1"
+ local bricks="$2"
+ local cmd=""
+ for brick in $bricks
+ do
+ echo "${brick}#$(get_entry_path_on_brick "$entry")"
+ done | tr '\n' ','
+}
+
+function get_stat_for_entry_from_all_bricks ()
+{
+ local entry="$1"
+ local bricks="$2"
+ local subvolume=0
+ local host=""
+ local bpath=""
+ local cmd=""
+
+ for brick in $bricks
+ do
+ if [[ "$((subvolume % 6))" == "0" ]]
+ then
+ subvolume=$((subvolume+1))
+ fi
+ host=$(echo "$brick" | cut -f1 -d':')
+ bpath=$(echo "$brick" | cut -f2 -d':')
+
+ cmd="stat --format=%F:%B:%s $bpath$(get_entry_path_on_brick "$entry") 2>/dev/null"
+ run_command_on_server "$subvolume" "${host}" "${cmd}" &
+ done | sort | uniq -c | sort -rnk1
+}
+
+function get_bricks_from_volume()
+{
+ local v=$1
+ gluster volume info "$v" | grep -E "^Brick[0-9][0-9]*:" | cut -f2- -d':'
+}
+
+function print_entry_gfid()
+{
+ local host="$1"
+ local dirpath="$2"
+ local entry="$3"
+ local gfid
+ gfid="$(ssh -n "${host}" "getfattr -d -m. -e hex $dirpath/$entry 2>/dev/null | grep trusted.gfid=|cut -f2 -d'='")"
+ echo "$entry" - "$gfid"
+}
+
+function print_brick_directory_info()
+{
+ local h="$1"
+ local dirpath="$2"
+ while read -r e
+ do
+ print_entry_gfid "${h}" "${dirpath}" "${e}"
+ done < <(ssh -n "${h}" "ls $dirpath 2>/dev/null")
+}
+
+function print_directory_info()
+{
+ local entry="$1"
+ local bricks="$2"
+ local h
+ local b
+ local gfid
+ for brick in $bricks;
+ do
+ h="$(echo "$brick" | cut -f1 -d':')"
+ b="$(echo "$brick" | cut -f2 -d':')"
+ dirpath="$b$(get_entry_path_on_brick "$entry")"
+ print_brick_directory_info "${h}" "${dirpath}" &
+ done | sort | uniq -c
+}
+
+function print_entries_needing_heal()
+{
+ local quorum=0
+ local entry="$1"
+ local bricks="$2"
+ while read -r line
+ do
+ quorum=$(echo "$line" | awk '{print $1}')
+ if [[ "$quorum" -lt 4 ]]
+ then
+ echo "$line - Not in Quorum"
+ else
+ echo "$line - In Quorum"
+ fi
+ done < <(print_directory_info "$entry" "$bricks")
+}
+
+function find_file_paths ()
+{
+ local bpath=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local brick_root=""
+ local gfid=""
+ local actual_path=""
+ local gfid_path=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+
+ gfid=$(echo "${file_entry}" | grep ".glusterfs")
+
+ if [[ -n "$gfid" ]]
+ then
+ gfid_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' "
+ actual_path=$(ssh -n "${file_host}" "${cmd}")
+ #removing absolute path so that user can refer this from mount point
+ actual_path=${actual_path#"$brick_root"}
+ else
+ actual_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' "
+ gfid_path=$(ssh -n "${file_host}" "${cmd}")
+ gfid_path=${gfid_path#"$brick_root"}
+ fi
+
+ echo "${gfid_path}|${actual_path}"
+}
+
+function log_can_not_heal ()
+{
+ local gfid_actual_paths=$1
+ local file_paths=$2
+ file_paths=${file_paths//#}
+
+ echo "${LINE_SEP}" >> "$CAN_NOT_HEAL"
+ echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL"
+ for bpath in ${file_paths//,/ }
+ do
+ echo "${bpath}" >> "$CAN_NOT_HEAL"
+ done
+}
+
+function main ()
+{
+ local bricks=""
+ local quorum=0
+ local stat_info=""
+ local file_type=""
+ local gfid_actual_paths=""
+ local bpath=""
+ local file_paths=""
+ local good=0
+ local bad=0
+ bricks=$(get_bricks_from_volume "$volume")
+ rm -f "$CAN_HEAL"
+ rm -f "$CAN_NOT_HEAL"
+ mkdir "$LOG_DIR" -p
+
+ heal_log "Started $(basename "$BASH_SOURCE") on $(date) "
+ while read -r heal_entry
+ do
+ heal_log "------------------------------------------------------------------"
+ heal_log "$heal_entry"
+
+ gfid_actual_paths=""
+ file_paths="$(get_entry_path_all_bricks "$heal_entry" "$bricks")"
+ stat_info="$(get_stat_for_entry_from_all_bricks "$heal_entry" "$bricks")"
+ heal_log "$stat_info"
+
+ quorum=$(echo "$stat_info" | head -1 | awk '{print $1}')
+ good_stat=$(echo "$stat_info" | head -1 | awk '{print $3}')
+ file_type="$(echo "$stat_info" | head -1 | cut -f2 -d':')"
+ if [[ "$file_type" == "directory" ]]
+ then
+ print_entries_needing_heal "$heal_entry" "$bricks"
+ else
+ if [[ "$quorum" -ge 4 ]]
+ then
+ good=$((good + 1))
+ heal_log "Verdict: Healable"
+
+ echo "${good_stat}|$file_paths" >> "$CAN_HEAL"
+ else
+ bad=$((bad + 1))
+ heal_log "Verdict: Not Healable"
+ for bpath in ${file_paths//,/ }
+ do
+ if [[ -z "$gfid_actual_paths" ]]
+ then
+ gfid_actual_paths=$(find_file_paths "$bpath")
+ else
+ break
+ fi
+ done
+ log_can_not_heal "$gfid_actual_paths" "${file_paths}"
+ fi
+ fi
+ done < <(get_pending_entries "$volume")
+ heal_log "========================================="
+ heal_log "Total number of potential heal : ${good}"
+ heal_log "Total number of can not heal : ${bad}"
+ heal_log "========================================="
+}
+
+_init "$@" && main "$@"
diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am
new file mode 100644
index 00000000000..9eaa401b6c8
--- /dev/null
+++ b/extras/ganesha/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = scripts config ocf
+CLEANFILES =
diff --git a/extras/ganesha/config/Makefile.am b/extras/ganesha/config/Makefile.am
new file mode 100644
index 00000000000..c729273096e
--- /dev/null
+++ b/extras/ganesha/config/Makefile.am
@@ -0,0 +1,4 @@
+EXTRA_DIST= ganesha-ha.conf.sample
+
+confdir = $(sysconfdir)/ganesha
+conf_DATA = ganesha-ha.conf.sample
diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample
new file mode 100644
index 00000000000..c22892bde56
--- /dev/null
+++ b/extras/ganesha/config/ganesha-ha.conf.sample
@@ -0,0 +1,19 @@
+# Name of the HA cluster created.
+# must be unique within the subnet
+HA_NAME="ganesha-ha-360"
+#
+# N.B. you may use short names or long names; you may not use IP addrs.
+# Once you select one, stay with it as it will be mildly unpleasant to
+# clean up if you switch later on. Ensure that all names - short and/or
+# long - are in DNS or /etc/hosts on all machines in the cluster.
+#
+# The subset of nodes of the Gluster Trusted Pool that form the ganesha
+# HA cluster. Hostname is specified.
+HA_CLUSTER_NODES="server1,server2,..."
+#HA_CLUSTER_NODES="server1.lab.redhat.com,server2.lab.redhat.com,..."
+#
+# Virtual IPs for each of the nodes specified above.
+VIP_server1="10.0.2.1"
+VIP_server2="10.0.2.2"
+#VIP_server1_lab_redhat_com="10.0.2.1"
+#VIP_server2_lab_redhat_com="10.0.2.2"
diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am
new file mode 100644
index 00000000000..990a609f254
--- /dev/null
+++ b/extras/ganesha/ocf/Makefile.am
@@ -0,0 +1,11 @@
+EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd
+
+# The root of the OCF resource agent hierarchy
+# Per the OCF standard, it's always "lib",
+# not "lib64" (even on 64-bit platforms).
+ocfdir = $(prefix)/lib/ocf
+
+# The provider directory
+radir = $(ocfdir)/resource.d/heartbeat
+
+ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd
diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
new file mode 100644
index 00000000000..825f7164597
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_grace
@@ -0,0 +1,221 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "$OCF_DEBUG_LIBRARY" ]; then
+ . $OCF_DEBUG_LIBRARY
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+OCF_RESKEY_grace_active_default="grace-active"
+: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_grace">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="grace_active">
+<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc>
+<content type="string" default="grace-active" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="40s" />
+<action name="stop" timeout="40s" />
+<action name="status" timeout="20s" interval="60s" />
+<action name="monitor" depth="0" timeout="10s" interval="5s" />
+<action name="notify" timeout="10s" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_grace_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_grace_start()
+{
+ local rc=${OCF_ERR_GENERIC}
+ local host=$(hostname -s)
+
+ ocf_log debug "ganesha_grace_start()"
+ # give ganesha_mon RA a chance to set the crm_attr first
+ # I mislike the sleep, but it's not clear that looping
+ # with a small sleep is necessarily better
+ # start has a 40sec timeout, so a 5sec sleep here is okay
+ sleep 5
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
+ if [ $? -ne 0 ]; then
+ ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+ fi
+ fi
+
+ # Three possibilities:
+ # 1. There is no attribute at all and attr_updater returns
+ # a zero length string. This happens when
+ # ganesha_mon::monitor hasn't run at least once to set
+ # the attribute. The assumption here is that the system
+ # is coming up. We pretend, for now, that the node is
+ # healthy, to allow the system to continue coming up.
+ # It will cure itself in a few seconds
+ # 2. There is an attribute, and it has the value "1"; this
+ # node is healthy.
+ # 3. There is an attribute, but it has no value or the value
+ # "0"; this node is not healthy.
+
+ # case 1
+ if [[ -z "${attr}" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ # case 2
+ if [[ "${attr}" = *"value=1" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ # case 3
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_grace_stop()
+{
+
+ ocf_log debug "ganesha_grace_stop()"
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_notify()
+{
+ # since this is a clone RA we should only ever see pre-start
+ # or post-stop
+ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+ case "${mode}" in
+ pre-start | post-stop)
+ dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname}
+ if [ $? -ne 0 ]; then
+ ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed"
+ fi
+ ;;
+ esac
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_monitor()
+{
+ local host=$(hostname -s)
+
+ ocf_log debug "monitor"
+
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+ fi
+ fi
+
+ # if there is no attribute (yet), maybe it's because
+ # this RA started before ganesha_mon (nfs-mon) has had
+ # chance to create it. In which case we'll pretend
+ # everything is okay this time around
+ if [[ -z "${attr}" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ if [[ "${attr}" = *"value=1" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_grace_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_validate
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) ganesha_grace_start
+ ;;
+stop) ganesha_grace_stop
+ ;;
+status|monitor) ganesha_grace_monitor
+ ;;
+notify) ganesha_grace_notify
+ ;;
+*) ganesha_grace_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
new file mode 100644
index 00000000000..2b4a9d6da84
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_mon
@@ -0,0 +1,234 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "${OCF_DEBUG_LIBRARY}" ]; then
+ . ${OCF_DEBUG_LIBRARY}
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+# Defaults
+OCF_RESKEY_ganesha_active_default="ganesha-active"
+OCF_RESKEY_grace_active_default="grace-active"
+OCF_RESKEY_grace_delay_default="5"
+
+: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}}
+: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}}
+: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_mon">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="ganesha_active">
+<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc>
+<content type="string" default="ganesha-active" />
+</parameter>
+<parameter name="grace_active">
+<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc>
+<content type="string" default="grace-active" />
+</parameter>
+<parameter name="grace_delay">
+<longdesc lang="en">
+NFS-Ganesha grace delay.
+When changing this, adjust the ganesha_grace RA's monitor interval to match.
+</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc>
+<content type="string" default="5" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="40s" />
+<action name="stop" timeout="40s" />
+<action name="status" timeout="20s" interval="60s" />
+<action name="monitor" depth="0" timeout="10s" interval="10s" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_mon_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case ${__OCF_ACTION} in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_mon_start()
+{
+ ocf_log debug "ganesha_mon_start"
+ ganesha_mon_monitor
+ return $OCF_SUCCESS
+}
+
+ganesha_mon_stop()
+{
+ ocf_log debug "ganesha_mon_stop"
+ return $OCF_SUCCESS
+}
+
+ganesha_mon_monitor()
+{
+ local host=$(hostname -s)
+ local pid_file="/var/run/ganesha.pid"
+ local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
+ local proc_pid="/proc/"
+
+ # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid
+ # RHEL7 systemd does not. Would be nice if all distros used the
+ # same pid file.
+ if [ -e ${rhel6_pid_file} ]; then
+ pid_file=${rhel6_pid_file}
+ fi
+ if [ -e ${pid_file} ]; then
+ proc_pid="${proc_pid}$(cat ${pid_file})"
+ fi
+
+ if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
+
+ attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
+ fi
+
+ # ganesha_grace (nfs-grace) RA follows grace-active attr
+ # w/ constraint location
+ attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
+ fi
+
+ # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
+ # track grace-active crm_attr (attr != crm_attr)
+ # we can't just use the attr as there's no way to query
+ # its value in RHEL6 pacemaker
+
+ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+ if [ $? -ne 0 ]; then
+ ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+ fi
+
+ # VIP fail-over is triggered by clearing the
+ # ganesha-active node attribute on this node.
+ #
+ # Meanwhile the ganesha_grace notify() runs when its
+ # nfs-grace resource is disabled on a node; which
+ # is triggered by clearing the grace-active attribute
+ # on this node.
+ #
+ # We need to allow time for it to run and put
+ # the remaining ganesha.nfsds into grace before
+ # initiating the VIP fail-over.
+
+ attrd_updater -D -n ${OCF_RESKEY_grace_active}
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
+ fi
+
+ host=$(hostname -s)
+ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+ if [ $? -ne 0 ]; then
+ ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
+ fi
+ fi
+
+ sleep ${OCF_RESKEY_grace_delay}
+
+ attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_mon_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_mon_validate
+
+# Translate each action into the appropriate function call
+case ${__OCF_ACTION} in
+start) ganesha_mon_start
+ ;;
+stop) ganesha_mon_stop
+ ;;
+status|monitor) ganesha_mon_monitor
+ ;;
+*) ganesha_mon_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd
new file mode 100644
index 00000000000..f91e8b6b8f7
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_nfsd
@@ -0,0 +1,167 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "${OCF_DEBUG_LIBRARY}" ]; then
+ . ${OCF_DEBUG_LIBRARY}
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage"
+: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_nfsd">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="ha_vol_mnt">
+<longdesc lang="en">HA State Volume Mount Point</longdesc>
+<shortdesc lang="en">HA_State Volume Mount Point</shortdesc>
+<content type="string" default="" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="5s" />
+<action name="stop" timeout="5s" />
+<action name="status" depth="0" timeout="5s" interval="0" />
+<action name="monitor" depth="0" timeout="5s" interval="0" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_nfsd_start()
+{
+ local long_host=$(hostname)
+
+ if [[ -d /var/lib/nfs ]]; then
+ mv /var/lib/nfs /var/lib/nfs.backup
+ if [ $? -ne 0 ]; then
+ ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed"
+ fi
+ ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_stop()
+{
+
+ if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then
+ rm -f /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "rm -f /var/lib/nfs failed"
+ fi
+ mv /var/lib/nfs.backup /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_monitor()
+{
+ # pacemaker checks to see if RA is already running before starting it.
+ # if we return success, then it's presumed it's already running and
+ # doesn't need to be started, i.e. invoke the start action.
+ # return something other than success to make pacemaker invoke the
+ # start action
+ if [[ -L /var/lib/nfs ]]; then
+ return ${OCF_SUCCESS}
+ fi
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_nfsd_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_validate
+
+# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION"
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) ganesha_nfsd_start
+ ;;
+stop) ganesha_nfsd_stop
+ ;;
+status|monitor) ganesha_nfsd_monitor
+ ;;
+*) ganesha_nfsd_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am
new file mode 100644
index 00000000000..7e345fd5f19
--- /dev/null
+++ b/extras/ganesha/scripts/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \
+ ganesha-ha.sh
+
+scriptsdir = $(libexecdir)/ganesha
+scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \
+ ganesha-ha.sh
diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh
new file mode 100755
index 00000000000..3040e8138b0
--- /dev/null
+++ b/extras/ganesha/scripts/create-export-ganesha.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+#This script is called by glusterd when the user
+#tries to export a volume via NFS-Ganesha.
+#An export file specific to a volume
+#is created in GANESHA_DIR/exports.
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_DIR=${1%/}
+OPTION=$2
+VOL=$3
+CONF=$GANESHA_DIR"/ganesha.conf"
+declare -i EXPORT_ID
+
+function check_cmd_status()
+{
+ if [ "$1" != "0" ]
+ then
+ rm -rf $GANESHA_DIR/exports/export.$VOL.conf
+ sed -i /$VOL.conf/d $CONF
+ exit 1
+ fi
+}
+
+
+if [ ! -d "$GANESHA_DIR/exports" ];
+ then
+ mkdir $GANESHA_DIR/exports
+ check_cmd_status `echo $?`
+fi
+
+function write_conf()
+{
+echo -e "# WARNING : Using Gluster CLI will overwrite manual
+# changes made to this file. To avoid it, edit the
+# file and run ganesha-ha.sh --refresh-config."
+
+echo "EXPORT{"
+echo " Export_Id = 2;"
+echo " Path = \"/$VOL\";"
+echo " FSAL {"
+echo " name = "GLUSTER";"
+echo " hostname=\"localhost\";"
+echo " volume=\"$VOL\";"
+echo " }"
+echo " Access_type = RW;"
+echo " Disable_ACL = true;"
+echo ' Squash="No_root_squash";'
+echo " Pseudo=\"/$VOL\";"
+echo ' Protocols = "3", "4" ;'
+echo ' Transports = "UDP","TCP";'
+echo ' SecType = "sys";'
+echo ' Security_Label = False;'
+echo " }"
+}
+if [ "$OPTION" = "on" ];
+then
+ if ! (cat $CONF | grep $VOL.conf\"$ )
+ then
+ write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf
+ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF
+ count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l`
+ if [ "$count" = "1" ] ; then
+ EXPORT_ID=2
+ else
+ EXPORT_ID=`cat $GANESHA_DIR/.export_added`
+ check_cmd_status `echo $?`
+ EXPORT_ID=EXPORT_ID+1
+ sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \
+ $GANESHA_DIR/exports/export.$VOL.conf
+ check_cmd_status `echo $?`
+ fi
+ echo $EXPORT_ID > $GANESHA_DIR/.export_added
+ fi
+else
+ rm -rf $GANESHA_DIR/exports/export.$VOL.conf
+ sed -i /$VOL.conf/d $CONF
+fi
diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh
new file mode 100755
index 00000000000..9d613a0e7ad
--- /dev/null
+++ b/extras/ganesha/scripts/dbus-send.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_DIR=${1%/}
+OPTION=$2
+VOL=$3
+CONF=$GANESHA_DIR"/ganesha.conf"
+
+function check_cmd_status()
+{
+ if [ "$1" != "0" ]
+ then
+ logger "dynamic export failed on node :${hostname -s}"
+ fi
+}
+
+#This function keeps track of export IDs and increments it with every new entry
+function dynamic_export_add()
+{
+ dbus-send --system \
+--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \
+string:"EXPORT(Path=/$VOL)"
+ check_cmd_status `echo $?`
+}
+
+#This function removes an export dynamically(uses the export_id of the export)
+function dynamic_export_remove()
+{
+ # Below bash fetch all the export from ShowExport command and search
+ # export entry based on path and then get its export entry.
+ # There are two possiblities for path, either entire volume will be
+ # exported or subdir. It handles both cases. But it remove only first
+ # entry from the list based on assumption that entry exported via cli
+ # has lowest export id value
+ removed_id=$(dbus-send --type=method_call --print-reply --system \
+ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+ org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \
+ "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \
+ | head -1)
+
+ dbus-send --print-reply --system \
+--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id
+ check_cmd_status `echo $?`
+}
+
+if [ "$OPTION" = "on" ];
+then
+ dynamic_export_add $@
+fi
+
+if [ "$OPTION" = "off" ];
+then
+ dynamic_export_remove $@
+fi
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
new file mode 100644
index 00000000000..9790a719e10
--- /dev/null
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -0,0 +1,1199 @@
+#!/bin/bash
+
+# Copyright 2015-2016 Red Hat Inc. All Rights Reserved
+#
+# Pacemaker+Corosync High Availability for NFS-Ganesha
+#
+# setup, teardown, add, delete, refresh-config, and status
+#
+# Each participating node in the cluster is assigned a virtual IP (VIP)
+# which fails over to another node when its associated ganesha.nfsd dies
+# for any reason. After the VIP is moved to another node all the
+# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE.
+#
+# There are six resource agent types used: ganesha_mon, ganesha_grace,
+# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the
+# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put
+# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start
+# and stop the ganesha.nfsd during setup and teardown. IPaddr manages
+# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to
+# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to
+# the new host.
+
+GANESHA_HA_SH=$(realpath $0)
+HA_NUM_SERVERS=0
+HA_SERVERS=""
+HA_VOL_NAME="gluster_shared_storage"
+HA_VOL_MNT="/run/gluster/shared_storage"
+HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
+SERVICE_MAN="DISTRO_NOT_FOUND"
+
+# rhel, fedora id, version
+ID=""
+VERSION_ID=""
+
+PCS9OR10_PCS_CNAME_OPTION=""
+PCS9OR10_PCS_CLONE_OPTION="clone"
+SECRET_PEM="/var/lib/glusterd/nfs/secret.pem"
+
+# UNBLOCK RA uses shared_storage which may become unavailable
+# during any of the nodes reboot. Hence increase timeout value.
+PORTBLOCK_UNBLOCK_TIMEOUT="60s"
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_CONF=
+
+function find_rhel7_conf
+{
+ while [[ $# > 0 ]]
+ do
+ key="$1"
+ case $key in
+ -f)
+ CONFFILE="$2"
+ break;
+ ;;
+ *)
+ ;;
+ esac
+ shift
+ done
+}
+
+if [ -z ${CONFFILE} ]
+ then
+ find_rhel7_conf ${OPTIONS}
+
+fi
+
+GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf}
+
+usage() {
+
+ echo "Usage : add|delete|refresh-config|status"
+ echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \
+<NODE-HOSTNAME> <NODE-VIP>"
+ echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \
+<NODE-HOSTNAME>"
+ echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \
+<volume>"
+ echo "Status : ganesha-ha.sh --status <HA_CONFDIR>"
+}
+
+determine_service_manager () {
+
+ if [ -e "/bin/systemctl" ];
+ then
+ SERVICE_MAN="/bin/systemctl"
+ elif [ -e "/sbin/invoke-rc.d" ];
+ then
+ SERVICE_MAN="/sbin/invoke-rc.d"
+ elif [ -e "/sbin/service" ];
+ then
+ SERVICE_MAN="/sbin/service"
+ fi
+ if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]]
+ then
+ logger "Service manager not recognized, exiting"
+ exit 1
+ fi
+}
+
+manage_service ()
+{
+ local action=${1}
+ local new_node=${2}
+ local option=
+
+ if [[ "${action}" == "start" ]]; then
+ option="yes"
+ else
+ option="no"
+ fi
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option"
+
+ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]
+ then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha"
+ else
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}"
+ fi
+}
+
+
+check_cluster_exists()
+{
+ local name=${1}
+ local cluster_name=""
+
+ if [ -e /var/run/corosync.pid ]; then
+ cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3)
+ if [[ "${cluster_name}X" == "${name}X" ]]; then
+ logger "$name already exists, exiting"
+ exit 0
+ fi
+ fi
+}
+
+
+determine_servers()
+{
+ local cmd=${1}
+ local num_servers=0
+ local tmp_ifs=${IFS}
+ local ha_servers=""
+
+ if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then
+ ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//')
+ IFS=$' '
+ for server in ${ha_servers} ; do
+ num_servers=$(expr ${num_servers} + 1)
+ done
+ IFS=${tmp_ifs}
+ HA_NUM_SERVERS=${num_servers}
+ HA_SERVERS="${ha_servers}"
+ else
+ IFS=$','
+ for server in ${HA_CLUSTER_NODES} ; do
+ num_servers=$(expr ${num_servers} + 1)
+ done
+ IFS=${tmp_ifs}
+ HA_NUM_SERVERS=${num_servers}
+ HA_SERVERS="${HA_CLUSTER_NODES//,/ }"
+ fi
+}
+
+stop_ganesha_all()
+{
+ local serverlist=${1}
+ for node in ${serverlist} ; do
+ manage_service "stop" ${node}
+ done
+}
+
+setup_cluster()
+{
+ local name=${1}
+ local num_servers=${2}
+ local servers=${3}
+ local unclean=""
+ local quorum_policy="stop"
+
+ logger "setting up cluster ${name} with the following ${servers}"
+
+ # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
+ pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out"
+ #set up failed stop all ganesha process and clean up symlinks in cluster
+ stop_ganesha_all "${servers}"
+ exit 1;
+ fi
+
+ # pcs cluster auth ${servers}
+ pcs cluster auth
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster auth failed"
+ fi
+
+ pcs cluster start --all
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster start failed"
+ exit 1;
+ fi
+
+ sleep 1
+ # wait for the cluster to elect a DC before querying or writing
+ # to the CIB. BZ 1334092
+ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+ while [ $? -ne 0 ]; do
+ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+ done
+
+ unclean=$(pcs status | grep -u "UNCLEAN")
+ while [[ "${unclean}X" == "UNCLEANX" ]]; do
+ sleep 1
+ unclean=$(pcs status | grep -u "UNCLEAN")
+ done
+ sleep 1
+
+ if [ ${num_servers} -lt 3 ]; then
+ quorum_policy="ignore"
+ fi
+ pcs property set no-quorum-policy=${quorum_policy}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
+ fi
+
+ pcs property set stonith-enabled=false
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set stonith-enabled=false failed"
+ fi
+}
+
+
+setup_finalize_ha()
+{
+ local cibfile=${1}
+ local stopped=""
+
+ stopped=$(pcs status | grep -u "Stopped")
+ while [[ "${stopped}X" == "StoppedX" ]]; do
+ sleep 1
+ stopped=$(pcs status | grep -u "Stopped")
+ done
+}
+
+
+refresh_config ()
+{
+ local short_host=$(hostname -s)
+ local VOL=${1}
+ local HA_CONFDIR=${2}
+ local short_host=$(hostname -s)
+
+ local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\
+ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
+
+
+ if [ -e ${SECRET_PEM} ]; then
+ while [[ ${3} ]]; do
+ current_host=`echo ${3} | cut -d "." -f 1`
+ if [[ ${short_host} != ${current_host} ]]; then
+ output=$(ssh -oPasswordAuthentication=no \
+-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \
+"dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
+string:$HA_CONFDIR/exports/export.$VOL.conf \
+string:\"EXPORT(Export_Id=$export_id)\" 2>&1")
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ]; then
+ echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}"
+ else
+ echo "Refresh-config completed on ${current_host}."
+ fi
+
+ fi
+ shift
+ done
+ else
+ echo "Error: refresh-config failed. Passwordless ssh is not enabled."
+ exit 1
+ fi
+
+ # Run the same command on the localhost,
+ output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
+string:$HA_CONFDIR/exports/export.$VOL.conf \
+string:"EXPORT(Export_Id=$export_id)" 2>&1)
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ] ; then
+ echo "Refresh-config failed on localhost."
+ else
+ echo "Success: refresh-config completed."
+ fi
+}
+
+
+teardown_cluster()
+{
+ local name=${1}
+
+ for server in ${HA_SERVERS} ; do
+ if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then
+ logger "info: ${server} is not in config, removing"
+
+ pcs cluster stop ${server} --force
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster stop ${server} failed"
+ fi
+
+ pcs cluster node remove ${server}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node remove ${server} failed"
+ fi
+ fi
+ done
+
+ # BZ 1193433 - pcs doesn't reload cluster.conf after modification
+ # after teardown completes, a subsequent setup will appear to have
+ # 'remembered' the deleted node. You can work around this by
+ # issuing another `pcs cluster node remove $node`,
+ # `crm_node -f -R $server`, or
+ # `cibadmin --delete --xml-text '<node id="$server"
+ # uname="$server"/>'
+
+ pcs cluster stop --all
+ if [ $? -ne 0 ]; then
+ logger "warning pcs cluster stop --all failed"
+ fi
+
+ pcs cluster destroy
+ if [ $? -ne 0 ]; then
+ logger "error pcs cluster destroy failed"
+ exit 1
+ fi
+}
+
+
+cleanup_ganesha_config ()
+{
+ rm -f /etc/corosync/corosync.conf
+ rm -rf /etc/cluster/cluster.conf*
+ rm -rf /var/lib/pacemaker/cib/*
+ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf
+}
+
+do_create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+ local primary=${1}; shift
+ local weight="1000"
+
+ # first a constraint location rule that says the VIP must be where
+ # there's a ganesha.nfsd running
+ pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed"
+ fi
+
+ # then a set of constraint location prefers to set the prefered order
+ # for where a VIP should move
+ while [[ ${1} ]]; do
+ pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed"
+ fi
+ weight=$(expr ${weight} + 1000)
+ shift
+ done
+ # and finally set the highest preference for the VIP to its home node
+ # default weight when created is/was 100.
+ # on Fedora setting appears to be additive, so to get the desired
+ # value we adjust the weight
+ # weight=$(expr ${weight} - 100)
+ pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed"
+ fi
+}
+
+
+wrap_create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+ local primary=${1}; shift
+ local head=""
+ local tail=""
+
+ # build a list of peers, e.g. for a four node cluster, for node1,
+ # the result is "node2 node3 node4"; for node2, "node3 node4 node1"
+ # and so on.
+ while [[ ${1} ]]; do
+ if [[ ${1} == ${primary} ]]; then
+ shift
+ while [[ ${1} ]]; do
+ tail=${tail}" "${1}
+ shift
+ done
+ else
+ head=${head}" "${1}
+ fi
+ shift
+ done
+ do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head}
+}
+
+
+create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS}
+ shift
+ done
+}
+
+
+setup_create_resources()
+{
+ local cibfile=$(mktemp -u)
+
+ # fixup /var/lib/nfs
+ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}"
+ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace
+ # start method. Allow time for ganesha_mon to start and set the
+ # ganesha-active crm_attribute
+ sleep 5
+
+ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1"
+ fi
+
+ pcs cluster cib ${cibfile}
+
+ while [[ ${1} ]]; do
+
+ # this is variable indirection
+ # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"'
+ # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...)
+ # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1')
+ # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"')
+ # after the `eval ${clean_nvs}` there is a variable VIP_host_1
+ # with the value '10_7_6_5', and the following \$$ magic to
+ # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us
+ # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s
+ # to give us ipaddr="10.7.6.5". whew!
+ name="VIP_${1}"
+ clean_name=${name//[-.]/_}
+ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
+ clean_nvs=${nvs//[-.]/_}
+ eval ${clean_nvs}
+ eval tmp_ipaddr=\$${clean_name}
+ ipaddr=${tmp_ipaddr//_/.}
+
+ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=block ip=${ipaddr} --group ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
+ fi
+
+ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
+ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
+ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_unblock failed"
+ fi
+
+
+ shift
+ done
+
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+}
+
+
+teardown_resources()
+{
+ # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2)
+
+ # restore /var/lib/nfs
+ logger "notice: pcs resource delete nfs_setup-clone"
+ pcs resource delete nfs_setup-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs_setup-clone failed"
+ fi
+
+ # delete -clone resource agents
+ # in particular delete the ganesha monitor so we don't try to
+ # trigger anything when we shut down ganesha next.
+ pcs resource delete nfs-mon-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs-mon-clone failed"
+ fi
+
+ pcs resource delete nfs-grace-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs-grace-clone failed"
+ fi
+
+ while [[ ${1} ]]; do
+ pcs resource delete ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete ${1}-group failed"
+ fi
+ shift
+ done
+
+}
+
+
+recreate_resources()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ # this is variable indirection
+ # see the comment on the same a few lines up
+ name="VIP_${1}"
+ clean_name=${name//[-.]/_}
+ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
+ clean_nvs=${nvs//[-.]/_}
+ eval ${clean_nvs}
+ eval tmp_ipaddr=\$${clean_name}
+ ipaddr=${tmp_ipaddr//_/.}
+
+ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=block ip=${ipaddr} --group ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
+ fi
+
+ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
+ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
+ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_unblock failed"
+ fi
+
+ shift
+ done
+}
+
+
+addnode_recreate_resources()
+{
+ local cibfile=${1}; shift
+ local add_node=${1}; shift
+ local add_vip=${1}; shift
+
+ recreate_resources ${cibfile} ${HA_SERVERS}
+
+ pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \
+ protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+ ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \
+ --after ${add_node}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed"
+ fi
+ pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \
+ protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \
+ ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \
+ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \
+ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-nfs_unblock failed"
+ fi
+}
+
+
+clear_resources()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ pcs -f ${cibfile} resource delete ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs -f ${cibfile} resource delete ${1}-group"
+ fi
+
+ shift
+ done
+}
+
+
+addnode_create_resources()
+{
+ local add_node=${1}; shift
+ local add_vip=${1}; shift
+ local cibfile=$(mktemp -u)
+
+ # start HA on the new node
+ pcs cluster start ${add_node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster start ${add_node} failed"
+ fi
+
+ pcs cluster cib ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib ${cibfile} failed"
+ fi
+
+ # delete all the -cluster_ip-1 resources, clearing
+ # their constraints, then create them again so we can
+ # recompute their constraints
+ clear_resources ${cibfile} ${HA_SERVERS}
+ addnode_recreate_resources ${cibfile} ${add_node} ${add_vip}
+
+ HA_SERVERS="${HA_SERVERS} ${add_node}"
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+}
+
+
+deletenode_delete_resources()
+{
+ local node=${1}; shift
+ local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//)
+ local cibfile=$(mktemp -u)
+
+ pcs cluster cib ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib ${cibfile} failed"
+ fi
+
+ # delete all the -cluster_ip-1 and -trigger_ip-1 resources,
+ # clearing their constraints, then create them again so we can
+ # recompute their constraints
+ clear_resources ${cibfile} ${HA_SERVERS}
+ recreate_resources ${cibfile} ${ha_servers}
+ HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /")
+
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+
+}
+
+
+deletenode_update_haconfig()
+{
+ local name="VIP_${1}"
+ local clean_name=${name//[-.]/_}
+
+ ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/")
+ sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf
+}
+
+
+setup_state_volume()
+{
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local shortname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ while [[ ${1} ]]; do
+
+ if [[ ${1} == *${dname} ]]; then
+ dirname=${1}
+ else
+ dirname=${1}${dname}
+ fi
+
+ if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then
+ mkdir ${mnt}/nfs-ganesha/tickle_dir
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+ fi
+ for server in ${HA_SERVERS} ; do
+ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
+ fi
+ done
+ shift
+ done
+
+}
+
+
+enable_pacemaker()
+{
+ while [[ ${1} ]]; do
+ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker"
+ else
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable"
+ fi
+ shift
+ done
+}
+
+
+addnode_state_volume()
+{
+ local newnode=${1}; shift
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ if [[ ${newnode} == *${dname} ]]; then
+ dirname=${newnode}
+ else
+ dirname=${newnode}${dname}
+ fi
+
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+ fi
+
+ for server in ${HA_SERVERS} ; do
+
+ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
+
+ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+ done
+
+}
+
+
+delnode_state_volume()
+{
+ local delnode=${1}; shift
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ if [[ ${delnode} == *${dname} ]]; then
+ dirname=${delnode}
+ else
+ dirname=${delnode}${dname}
+ fi
+
+ rm -rf ${mnt}/nfs-ganesha/${dirname}
+
+ for server in ${HA_SERVERS} ; do
+ if [[ ${server} != ${dirname} ]]; then
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+ done
+}
+
+
+status()
+{
+ local scratch=$(mktemp)
+ local regex_str="^${1}-cluster_ip-1"
+ local healthy=0
+ local index=1
+ local nodes
+
+ # change tabs to spaces, strip leading spaces, including any
+ # new '*' at the beginning of a line introduced in pcs-0.10.x
+ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch}
+
+ nodes[0]=${1}; shift
+
+ # make a regex of the configured nodes
+ # and initalize the nodes array for later
+ while [[ ${1} ]]; do
+
+ regex_str="${regex_str}|^${1}-cluster_ip-1"
+ nodes[${index}]=${1}
+ ((index++))
+ shift
+ done
+
+ # print the nodes that are expected to be online
+ grep -E "Online:" ${scratch}
+
+ echo
+
+ # print the VIPs and which node they are on
+ grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4
+
+ echo
+
+ # check if the VIP and port block/unblock RAs are on the expected nodes
+ for n in ${nodes[*]}; do
+
+ grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ done
+
+ grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch}
+ result=$?
+
+ if [ ${result} -eq 0 ]; then
+ echo "Cluster HA Status: BAD"
+ elif [ ${healthy} -eq 0 ]; then
+ echo "Cluster HA Status: HEALTHY"
+ else
+ echo "Cluster HA Status: FAILOVER"
+ fi
+
+ rm -f ${scratch}
+}
+
+create_ganesha_conf_file()
+{
+ if [[ "$1" == "yes" ]];
+ then
+ if [ -e $GANESHA_CONF ];
+ then
+ rm -rf $GANESHA_CONF
+ fi
+ # The symlink /etc/ganesha/ganesha.conf need to be
+ # created using ganesha conf file mentioned in the
+ # shared storage. Every node will only have this
+ # link and actual file will stored in shared storage,
+ # so that ganesha conf editing of ganesha conf will
+ # be easy as well as it become more consistent.
+
+ ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF
+ else
+ # Restoring previous file
+ rm -rf $GANESHA_CONF
+ cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF
+ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF
+ fi
+}
+
+set_quorum_policy()
+{
+ local quorum_policy="stop"
+ local num_servers=${1}
+
+ if [ ${num_servers} -lt 3 ]; then
+ quorum_policy="ignore"
+ fi
+ pcs property set no-quorum-policy=${quorum_policy}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
+ fi
+}
+
+main()
+{
+
+ local cmd=${1}; shift
+ if [[ ${cmd} == *help ]]; then
+ usage
+ exit 0
+ fi
+
+ if (selinuxenabled) ;then
+ semanage boolean -m gluster_use_execmem --on
+ fi
+
+ local osid=""
+
+ osid=$(grep ^ID= /etc/os-release)
+ eval $(echo ${osid} | grep -F ID=)
+ osid=$(grep ^VERSION_ID= /etc/os-release)
+ eval $(echo ${osid} | grep -F VERSION_ID=)
+
+ HA_CONFDIR=${1%/}; shift
+ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
+ local node=""
+ local vip=""
+
+ # ignore any comment lines
+ cfgline=$(grep ^HA_NAME= ${ha_conf})
+ eval $(echo ${cfgline} | grep -F HA_NAME=)
+ cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf})
+ eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=)
+
+ case "${cmd}" in
+
+ setup | --setup)
+ logger "setting up ${HA_NAME}"
+
+ check_cluster_exists ${HA_NAME}
+
+ determine_servers "setup"
+
+ # Fedora 29+ and rhel/centos 8 has PCS-0.10.x
+ # default is pcs-0.10.x options but check for
+ # rhel/centos 7 (pcs-0.9.x) and adjust accordingly
+ if [[ ! ${ID} =~ {rhel,centos} ]]; then
+ if [[ ${VERSION_ID} == 7.* ]]; then
+ PCS9OR10_PCS_CNAME_OPTION="--name"
+ PCS9OR10_PCS_CLONE_OPTION="--clone"
+ fi
+ fi
+
+ if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then
+
+ determine_service_manager
+
+ setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}"
+
+ setup_create_resources ${HA_SERVERS}
+
+ setup_finalize_ha
+
+ setup_state_volume ${HA_SERVERS}
+
+ enable_pacemaker ${HA_SERVERS}
+
+ else
+
+ logger "insufficient servers for HA, aborting"
+ fi
+ ;;
+
+ teardown | --teardown)
+ logger "tearing down ${HA_NAME}"
+
+ determine_servers "teardown"
+
+ teardown_resources ${HA_SERVERS}
+
+ teardown_cluster ${HA_NAME}
+
+ cleanup_ganesha_config ${HA_CONFDIR}
+ ;;
+
+ cleanup | --cleanup)
+ cleanup_ganesha_config ${HA_CONFDIR}
+ ;;
+
+ add | --add)
+ node=${1}; shift
+ vip=${1}; shift
+
+ logger "adding ${node} with ${vip} to ${HA_NAME}"
+
+ determine_service_manager
+
+ manage_service "start" ${node}
+
+ determine_servers "add"
+
+ pcs cluster node add ${node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node add ${node} failed"
+ fi
+
+ addnode_create_resources ${node} ${vip}
+ # Subsequent add-node recreates resources for all the nodes
+ # that already exist in the cluster. The nodes are picked up
+ # from the entries in the ganesha-ha.conf file. Adding the
+ # newly added node to the file so that the resources specfic
+ # to this node is correctly recreated in the future.
+ clean_node=${node//[-.]/_}
+ echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf
+
+ NEW_NODES="$HA_CLUSTER_NODES,${node}"
+
+ sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \
+$HA_CONFDIR/ganesha-ha.conf
+
+ addnode_state_volume ${node}
+
+ # addnode_create_resources() already appended ${node} to
+ # HA_SERVERS, so only need to increment HA_NUM_SERVERS
+ # and set quorum policy
+ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1)
+ set_quorum_policy ${HA_NUM_SERVERS}
+ ;;
+
+ delete | --delete)
+ node=${1}; shift
+
+ logger "deleting ${node} from ${HA_NAME}"
+
+ determine_servers "delete"
+
+ deletenode_delete_resources ${node}
+
+ pcs cluster node remove ${node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node remove ${node} failed"
+ fi
+
+ deletenode_update_haconfig ${node}
+
+ delnode_state_volume ${node}
+
+ determine_service_manager
+
+ manage_service "stop" ${node}
+
+ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1)
+ set_quorum_policy ${HA_NUM_SERVERS}
+ ;;
+
+ status | --status)
+ determine_servers "status"
+
+ status ${HA_SERVERS}
+ ;;
+
+ refresh-config | --refresh-config)
+ VOL=${1}
+
+ determine_servers "refresh-config"
+
+ refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS}
+ ;;
+
+ setup-ganesha-conf-files | --setup-ganesha-conf-files)
+
+ create_ganesha_conf_file ${1}
+ ;;
+
+ *)
+ # setup and teardown are not intended to be used by a
+ # casual user
+ usage
+ logger "Usage: ganesha-ha.sh add|delete|status"
+ ;;
+
+ esac
+
+ if (selinuxenabled) ;then
+ semanage boolean -m gluster_use_execmem --off
+ fi
+}
+
+main $*
diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py
new file mode 100755
index 00000000000..77af014bab9
--- /dev/null
+++ b/extras/ganesha/scripts/generate-epoch.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+# Generates unique epoch value on each gluster node to be used by
+# nfs-ganesha service on that node.
+#
+# Configure 'EPOCH_EXEC' option to this script path in
+# '/etc/sysconfig/ganesha' file used by nfs-ganesha service.
+#
+# Construct epoch as follows -
+# first 32-bit contains the now() time
+# rest 32-bit value contains the local glusterd node uuid
+
+import time
+import binascii
+
+# Calculate the now() time into a 64-bit integer value
+def epoch_now():
+ epoch_time = int(time.mktime(time.localtime())) << 32
+ return epoch_time
+
+# Read glusterd UUID and extract first 32-bit of it
+def epoch_uuid():
+ file_name = '/var/lib/glusterd/glusterd.info'
+
+ for line in open(file_name):
+ if "UUID" in line:
+ glusterd_uuid = line.split('=')[1].strip()
+
+ uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-",""))
+
+ epoch_uuid = int(binascii.hexlify(uuid_bin), 32) & 0xFFFF0000
+ return epoch_uuid
+
+# Construct epoch as follows -
+# first 32-bit contains the now() time
+# rest 32-bit value contains the local glusterd node uuid
+epoch = (epoch_now() | epoch_uuid())
+print((str(epoch)))
+
+exit(0)
diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am
index e4603ae80b8..09eff308ac4 100644
--- a/extras/geo-rep/Makefile.am
+++ b/extras/geo-rep/Makefile.am
@@ -1,4 +1,4 @@
-scriptsdir = $(datadir)/glusterfs/scripts
+scriptsdir = $(libexecdir)/glusterfs/scripts
scripts_SCRIPTS = gsync-upgrade.sh generate-gfid-file.sh get-gfid.sh \
slave-upgrade.sh schedule_georep.py
diff --git a/extras/geo-rep/gsync-sync-gfid.c b/extras/geo-rep/gsync-sync-gfid.c
index b0c5d31ba0b..47dca0413e9 100644
--- a/extras/geo-rep/gsync-sync-gfid.c
+++ b/extras/geo-rep/gsync-sync-gfid.c
@@ -7,8 +7,8 @@
#include <libgen.h>
#include <ctype.h>
#include <stdlib.h>
-#include "glusterfs.h"
-#include "syscall.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
#ifndef UUID_CANONICAL_FORM_LEN
#define UUID_CANONICAL_FORM_LEN 36
diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in
index f29ae020b8f..48b2b507060 100644
--- a/extras/geo-rep/schedule_georep.py.in
+++ b/extras/geo-rep/schedule_georep.py.in
@@ -352,7 +352,7 @@ def get_summary(mastervol, slave_url):
def touch_mount_root(mastervol):
# Create a Mount and Touch the Mount point root,
# Hack to make sure some event available after
- # setting Checkpoint. Without this their is a chance of
+ # setting Checkpoint. Without this there is a chance of
# Checkpoint never completes.
with glustermount("localhost", mastervol) as mnt:
execute(["touch", mnt])
@@ -459,8 +459,8 @@ if __name__ == "__main__":
description=__doc__)
parser.add_argument("mastervol", help="Master Volume Name")
parser.add_argument("slave",
- help="SLAVEHOST or root@SLAVEHOST "
- "or user@SLAVEHOST",
+ help="Slave hostname "
+ "(<username>@SLAVEHOST or SLAVEHOST)",
metavar="SLAVE")
parser.add_argument("slavevol", help="Slave Volume Name")
parser.add_argument("--interval", help="Interval in Seconds. "
diff --git a/extras/glusterd.vol.in b/extras/glusterd.vol.in
index e59b17efcac..5d7bad0e4c8 100644
--- a/extras/glusterd.vol.in
+++ b/extras/glusterd.vol.in
@@ -1,16 +1,15 @@
volume management
type mgmt/glusterd
option working-directory @GLUSTERD_WORKDIR@
- option transport-type socket,rdma
+ option transport-type socket
option transport.socket.keepalive-time 10
option transport.socket.keepalive-interval 2
option transport.socket.read-fail-log off
option transport.socket.listen-port 24007
- option transport.rdma.listen-port 24008
option ping-timeout 0
option event-threads 1
# option lock-timer 180
# option transport.address-family inet6
# option base-port 49152
-# option max-port 65535
+ option max-port 60999
end-volume
diff --git a/extras/glusterfs-georep-upgrade.py b/extras/glusterfs-georep-upgrade.py
new file mode 100755
index 00000000000..634576058d6
--- /dev/null
+++ b/extras/glusterfs-georep-upgrade.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python3
+"""
+
+Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+This file is part of GlusterFS.
+
+This file is licensed to you under your choice of the GNU Lesser
+General Public License, version 3 or any later version (LGPLv3 or
+later), or the GNU General Public License, version 2 (GPLv2), in all
+cases as published by the Free Software Foundation.
+
+"""
+
+import argparse
+import errno
+import os, sys
+import shutil
+from datetime import datetime
+
+def find_htime_path(brick_path):
+ dirs = []
+ htime_dir = os.path.join(brick_path, '.glusterfs/changelogs/htime')
+ for file in os.listdir(htime_dir):
+ if os.path.isfile(os.path.join(htime_dir,file)) and file.startswith("HTIME"):
+ dirs.append(os.path.join(htime_dir, file))
+ else:
+ raise FileNotFoundError("%s unavailable" % (os.path.join(htime_dir, file)))
+ return dirs
+
+def modify_htime_file(brick_path):
+ htime_file_path_list = find_htime_path(brick_path)
+
+ for htime_file_path in htime_file_path_list:
+ changelog_path = os.path.join(brick_path, '.glusterfs/changelogs')
+ temp_htime_path = os.path.join(changelog_path, 'htime/temp_htime_file')
+ with open(htime_file_path, 'r') as htime_file, open(temp_htime_path, 'w') as temp_htime_file:
+ #extract epoch times from htime file
+ paths = htime_file.read().split("\x00")
+
+ for pth in paths:
+ epoch_no = pth.split(".")[-1]
+ changelog = os.path.basename(pth)
+ #convert epoch time to year, month and day
+ if epoch_no != '':
+ date=(datetime.fromtimestamp(float(int(epoch_no))).strftime("%Y/%m/%d"))
+ #update paths in temp htime file
+ temp_htime_file.write("%s/%s/%s\x00" % (changelog_path, date, changelog))
+ #create directory in the format year/month/days
+ path = os.path.join(changelog_path, date)
+
+ if changelog.startswith("CHANGELOG."):
+ try:
+ os.makedirs(path, mode = 0o600);
+ except OSError as exc:
+ if exc.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+
+ #copy existing changelogs to new directory structure, delete old changelog files
+ shutil.copyfile(pth, os.path.join(path, changelog))
+ os.remove(pth)
+
+ #rename temp_htime_file with htime file
+ os.rename(htime_file_path, os.path.join('%s.bak'%htime_file_path))
+ os.rename(temp_htime_path, htime_file_path)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('brick_path', help="This upgrade script, which is to be run on\
+ server side, takes brick path as the argument, \
+ updates paths inside htime file and alters the directory structure \
+ above the changelog files inorder to support new optimised format \
+ of the directory structure as per \
+ https://review.gluster.org/#/c/glusterfs/+/23733/")
+ args = parser.parse_args()
+ modify_htime_file(args.brick_path)
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate
index 75f700e6459..6ba6ef18e9f 100644
--- a/extras/glusterfs-logrotate
+++ b/extras/glusterfs-logrotate
@@ -45,3 +45,24 @@
compress
delaycompress
}
+
+# Rotate snapd log
+/var/log/glusterfs/snaps/*/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "snapd" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt
new file mode 100644
index 00000000000..a960b76c694
--- /dev/null
+++ b/extras/group-distributed-virt
@@ -0,0 +1,10 @@
+performance.quick-read=off
+performance.read-ahead=off
+performance.io-cache=off
+performance.low-prio-threads=32
+network.remote-dio=enable
+features.shard=on
+user.cifs=off
+client.event-threads=4
+server.event-threads=4
+performance.client-io-threads=on
diff --git a/extras/group-gluster-block b/extras/group-gluster-block
index 56b406e3641..1e398019e6b 100644
--- a/extras/group-gluster-block
+++ b/extras/group-gluster-block
@@ -5,6 +5,14 @@ performance.stat-prefetch=off
performance.open-behind=off
performance.readdir-ahead=off
performance.strict-o-direct=on
+performance.client-io-threads=on
+performance.io-thread-count=32
+performance.high-prio-threads=32
+performance.normal-prio-threads=32
+performance.low-prio-threads=32
+performance.least-prio-threads=4
+client.event-threads=8
+server.event-threads=8
network.remote-dio=disable
cluster.eager-lock=enable
cluster.quorum-type=auto
diff --git a/extras/group-samba b/extras/group-samba
new file mode 100644
index 00000000000..eeee6e06031
--- /dev/null
+++ b/extras/group-samba
@@ -0,0 +1,11 @@
+features.cache-invalidation=on
+features.cache-invalidation-timeout=600
+performance.cache-samba-metadata=on
+performance.stat-prefetch=on
+performance.cache-invalidation=on
+performance.md-cache-timeout=600
+network.inode-lru-limit=200000
+performance.nl-cache=on
+performance.nl-cache-timeout=600
+performance.readdir-ahead=on
+performance.parallel-readdir=on
diff --git a/extras/group-virt.example b/extras/group-virt.example
index c2ce89d7b9c..cc37c98a25c 100644
--- a/extras/group-virt.example
+++ b/extras/group-virt.example
@@ -2,7 +2,8 @@ performance.quick-read=off
performance.read-ahead=off
performance.io-cache=off
performance.low-prio-threads=32
-network.remote-dio=enable
+network.remote-dio=disable
+performance.strict-o-direct=on
cluster.eager-lock=enable
cluster.quorum-type=auto
cluster.server-quorum-type=server
@@ -16,3 +17,8 @@ cluster.choose-local=off
client.event-threads=4
server.event-threads=4
performance.client-io-threads=on
+network.ping-timeout=20
+server.tcp-user-timeout=20
+server.keepalive-time=10
+server.keepalive-interval=2
+server.keepalive-count=5
diff --git a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
index d8707a8fb4d..7d6052315bb 100755
--- a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
+++ b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
@@ -92,11 +92,13 @@ if [ -f $pub_file ]; then
ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}"
ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster vol set ${slavevol} features.read-only on"
else
scp -P ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $pub_file $slave_ip:$pub_file_tmp
ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}"
ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster vol set ${slavevol} features.read-only on"
fi
fi
fi
diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am
index bfc0c1cf080..9b236df096d 100644
--- a/extras/hook-scripts/add-brick/post/Makefile.am
+++ b/extras/hook-scripts/add-brick/post/Makefile.am
@@ -1,6 +1,6 @@
-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh
+EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/
if WITH_SERVER
-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh
+hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
endif
diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
new file mode 100755
index 00000000000..4a17c993a77
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# Install to hooks/<HOOKS_VER>/add-brick/post
+#
+# Add an SELinux file context for each brick using the glusterd_brick_t type.
+# This ensures that the brick is relabeled correctly on an SELinux restart or
+# restore. Subsequently, run a restore on the brick path to set the selinux
+# labels.
+#
+###
+
+PROGNAME="Sselinux"
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+VOL=
+
+parse_args () {
+ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+ eval set -- "${ARGS}"
+
+ while true; do
+ case ${1} in
+ --volname)
+ shift
+ VOL=${1}
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ ;;
+ --volume-op)
+ shift
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+set_brick_labels()
+{
+ local volname="${1}"
+ local fctx
+ local list=()
+
+ fctx="$(semanage fcontext --list -C)"
+
+ # wait for new brick path to be updated under
+ # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/
+ sleep 5
+
+ # grab the path for each local brick
+ brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/"
+ brickdirs=$(
+ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | \
+ sort -u
+ )
+
+ # create a list of bricks for which custom SELinux
+ # label doesn't exist
+ for b in ${brickdirs}; do
+ pattern="${b}(/.*)?"
+ echo "${fctx}" | grep "^${pattern}\s" >/dev/null
+ if [[ $? -ne 0 ]]; then
+ list+=("${pattern}")
+ fi
+ done
+
+ # Add a file context for each brick path in the list and associate with the
+ # glusterd_brick_t SELinux type.
+ for p in ${list[@]}
+ do
+ semanage fcontext --add -t glusterd_brick_t -r s0 "${p}"
+ done
+
+ # Set the labels for which SELinux label was added above
+ for b in ${brickdirs}
+ do
+ echo "${list[@]}" | grep "${b}" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ restorecon -R "${b}"
+ fi
+ done
+}
+
+SELINUX_STATE=$(which getenforce && getenforce)
+[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+parse_args "$@"
+[ -z "${VOL}" ] && exit 1
+
+set_brick_labels "${VOL}"
+
+exit 0
diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
index de242d20af4..f9b4b1a57e3 100755
--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh
+++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
@@ -34,18 +34,21 @@ parse_args () {
set_brick_labels()
{
- volname=${1}
+ volname="${1}"
# grab the path for each local brick
- brickpath="/var/lib/glusterd/vols/${volname}/bricks/*"
- brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u)
+ brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+ brickdirs=$(
+ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | \
+ sort -u
+ )
for b in ${brickdirs}; do
# Add a file context for each brick path and associate with the
# glusterd_brick_t SELinux type.
- pattern="${b}\(/.*\)?"
+ pattern="${b}(/.*)?"
semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}"
-
# Set the labels on the new brick path.
restorecon -R "${b}"
done
diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
index 6eba66fbe07..056b52afe76 100755
--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
+++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
@@ -15,45 +15,59 @@ OPTSPEC="volname:"
VOL=
function parse_args () {
- ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
- eval set -- "$ARGS"
-
- while true; do
- case $1 in
- --volname)
- shift
- VOL=$1
- ;;
- *)
- shift
- break
- ;;
- esac
+ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+ eval set -- "${ARGS}"
+
+ while true; do
+ case ${1} in
+ --volname)
+ shift
+ VOL=${1}
+ ;;
+ *)
shift
- done
+ break
+ ;;
+ esac
+ shift
+ done
}
function delete_brick_fcontext()
{
- volname=$1
-
- # grab the path for each local brick
- brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2)
+ local volname=$1
+ local fctx
+ local list=()
- for b in $brickdirs
- do
- # remove the file context associated with the brick path
- semanage fcontext --delete $b\(/.*\)?
- done
+ fctx="$(semanage fcontext --list -C)"
+ # grab the path for each local brick
+ brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+ brickdirs=$(find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | sort -u)
+ for b in ${brickdirs}
+ do
+ pattern="${b}(/.*)?"
+ echo "${fctx}" | grep "^${pattern}\s" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ list+=("${pattern}")
+ fi
+ done
+ if [[ ${#list[@]} -gt 0 ]]; then
+ printf 'fcontext --delete %s\n' "${list[@]}" | semanage -i -
+ fi
+ for b in ${brickdirs}
+ do
+ restorecon -R "${b}"
+ done
}
SELINUX_STATE=$(which getenforce && getenforce)
[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
parse_args "$@"
-[ -z "$VOL" ] && exit 1
+[ -z "${VOL}" ] && exit 1
-delete_brick_fcontext $VOL
+delete_brick_fcontext "${VOL}"
# failure to delete the fcontext is not fatal
exit 0
diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh
index c596db02381..854f131f6c8 100755
--- a/extras/hook-scripts/set/post/S30samba-set.sh
+++ b/extras/hook-scripts/set/post/S30samba-set.sh
@@ -89,7 +89,7 @@ function add_samba_share () {
STRING+="glusterfs:loglevel = 7\n"
STRING+="path = /\n"
STRING+="read only = no\n"
- STRING+="guest ok = yes\n"
+ STRING+="kernel share modes = no\n"
printf "$STRING" >> ${CONFIGFILE}
}
@@ -123,9 +123,18 @@ function get_smb () {
usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\
cut -d"=" -f2)
- if [ $usercifsvalue = "disable" ] || [ $usersmbvalue = "disable" ]; then
- uservalue="disable"
+ if [ -n "$usercifsvalue" ]; then
+ if [ "$usercifsvalue" = "disable" ] || [ "$usercifsvalue" = "off" ]; then
+ uservalue="disable"
+ fi
fi
+
+ if [ -n "$usersmbvalue" ]; then
+ if [ "$usersmbvalue" = "disable" ] || [ "$usersmbvalue" = "off" ]; then
+ uservalue="disable"
+ fi
+ fi
+
echo "$uservalue"
}
diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
index 885ed03ad5b..1f2564b44ff 100755
--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
+++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
@@ -79,9 +79,9 @@ done
if [ "$option" == "disable" ]; then
# Unmount the volume on all the nodes
- umount /var/run/gluster/shared_storage
- cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp
- mv /var/run/gluster/fstab.tmp /etc/fstab
+ umount /run/gluster/shared_storage
+ cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp
+ mv /run/gluster/fstab.tmp /etc/fstab
fi
if [ "$is_originator" == 1 ]; then
@@ -104,8 +104,15 @@ function check_volume_status()
echo $status
}
-mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
- /var/run/gluster/shared_storage"
+key=`echo $5 | cut -d '=' -f 1`
+val=`echo $5 | cut -d '=' -f 2`
+if [ "$key" == "transport.address-family" ]; then
+ mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \
+ $local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage"
+else
+ mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
+ /run/gluster/shared_storage"
+fi
if [ "$option" == "enable" ]; then
retry=0;
@@ -120,10 +127,10 @@ if [ "$option" == "enable" ]; then
status=$(check_volume_status)
done
# Mount the volume on all the nodes
- umount /var/run/gluster/shared_storage
- mkdir -p /var/run/gluster/shared_storage
+ umount /run/gluster/shared_storage
+ mkdir -p /run/gluster/shared_storage
$mount_cmd
- cp /etc/fstab /var/run/gluster/fstab.tmp
- echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp
- mv /var/run/gluster/fstab.tmp /etc/fstab
+ cp /etc/fstab /run/gluster/fstab.tmp
+ echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp
+ mv /run/gluster/fstab.tmp /etc/fstab
fi
diff --git a/extras/hook-scripts/start/post/Makefile.am b/extras/hook-scripts/start/post/Makefile.am
index e32546dc999..792019d3c9f 100644
--- a/extras/hook-scripts/start/post/Makefile.am
+++ b/extras/hook-scripts/start/post/Makefile.am
@@ -1,4 +1,4 @@
-EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh
+EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S31ganesha-start.sh
hookdir = $(GLUSTERD_WORKDIR)/hooks/1/start/post/
if WITH_SERVER
diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh
index dfd9c1b46ab..cac0cbf1464 100755
--- a/extras/hook-scripts/start/post/S30samba-start.sh
+++ b/extras/hook-scripts/start/post/S30samba-start.sh
@@ -88,7 +88,7 @@ function add_samba_share () {
STRING+="glusterfs:loglevel = 7\n"
STRING+="path = /\n"
STRING+="read only = no\n"
- STRING+="guest ok = yes\n"
+ STRING+="kernel share modes = no\n"
printf "$STRING" >> "${CONFIGFILE}"
}
@@ -111,14 +111,26 @@ function get_smb () {
usersmbvalue=$(grep user.smb "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\
cut -d"=" -f2)
- if [ "$usercifsvalue" = "disable" ] || [ "$usersmbvalue" = "disable" ]; then
- uservalue="disable"
+ if [ -n "$usercifsvalue" ]; then
+ if [ "$usercifsvalue" = "enable" ] || [ "$usercifsvalue" = "on" ]; then
+ uservalue="enable"
+ fi
fi
+
+ if [ -n "$usersmbvalue" ]; then
+ if [ "$usersmbvalue" = "enable" ] || [ "$usersmbvalue" = "on" ]; then
+ uservalue="enable"
+ fi
+ fi
+
echo "$uservalue"
}
parse_args "$@"
-if [ "$(get_smb "$VOL")" = "disable" ]; then
+
+value=$(get_smb "$VOL")
+
+if [ -z "$value" ] || [ "$value" != "enable" ]; then
exit 0
fi
diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh
new file mode 100755
index 00000000000..7ad6f23ad06
--- /dev/null
+++ b/extras/hook-scripts/start/post/S31ganesha-start.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+PROGNAME="Sganesha-start"
+OPTSPEC="volname:,gd-workdir:"
+VOL=
+declare -i EXPORT_ID
+ganesha_key="ganesha.enable"
+GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha"
+CONF1="$GANESHA_DIR/ganesha.conf"
+GLUSTERD_WORKDIR=
+
+function parse_args ()
+{
+ ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@)
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+
+
+#This function generates a new export entry as export.volume_name.conf
+function write_conf()
+{
+echo -e "# WARNING : Using Gluster CLI will overwrite manual
+# changes made to this file. To avoid it, edit the
+# file, copy it over to all the NFS-Ganesha nodes
+# and run ganesha-ha.sh --refresh-config."
+
+echo "EXPORT{"
+echo " Export_Id = 2;"
+echo " Path = \"/$VOL\";"
+echo " FSAL {"
+echo " name = \"GLUSTER\";"
+echo " hostname=\"localhost\";"
+echo " volume=\"$VOL\";"
+echo " }"
+echo " Access_type = RW;"
+echo " Disable_ACL = true;"
+echo " Squash=\"No_root_squash\";"
+echo " Pseudo=\"/$VOL\";"
+echo " Protocols = \"3\", \"4\" ;"
+echo " Transports = \"UDP\",\"TCP\";"
+echo " SecType = \"sys\";"
+echo "}"
+}
+
+#It adds the export dynamically by sending dbus signals
+function export_add()
+{
+ dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \
+string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)"
+
+}
+
+# based on src/scripts/ganeshactl/Ganesha/export_mgr.py
+function is_exported()
+{
+ local volume="${1}"
+
+ dbus-send --type=method_call --print-reply --system \
+ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+ org.ganesha.nfsd.exportmgr.ShowExports \
+ | grep -w -q "/${volume}"
+
+ return $?
+}
+
+# Check the info file (contains the volume options) to see if Ganesha is
+# enabled for this volume.
+function ganesha_enabled()
+{
+ local volume="${1}"
+ local info_file="${GLUSTERD_WORKDIR}/vols/${VOL}/info"
+ local enabled="off"
+
+ enabled=$(grep -w ${ganesha_key} ${info_file} | cut -d"=" -f2)
+
+ [ "${enabled}" == "on" ]
+
+ return $?
+}
+
+parse_args $@
+
+if ganesha_enabled ${VOL} && ! is_exported ${VOL}
+then
+ if [ ! -e ${GANESHA_DIR}/exports/export.${VOL}.conf ]
+ then
+ #Remove export entry from nfs-ganesha.conf
+ sed -i /$VOL.conf/d $CONF1
+ write_conf ${VOL} > ${GANESHA_DIR}/exports/export.${VOL}.conf
+ EXPORT_ID=`cat $GANESHA_DIR/.export_added`
+ EXPORT_ID=EXPORT_ID+1
+ echo $EXPORT_ID > $GANESHA_DIR/.export_added
+ sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \
+ $GANESHA_DIR/exports/export.$VOL.conf
+ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1
+ else
+ EXPORT_ID=$(grep ^[[:space:]]*Export_Id $GANESHA_DIR/exports/export.$VOL.conf |\
+ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
+ fi
+ export_add $VOL
+fi
+
+exit 0
diff --git a/extras/identify-hangs.sh b/extras/identify-hangs.sh
new file mode 100755
index 00000000000..ebc6bf144aa
--- /dev/null
+++ b/extras/identify-hangs.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+function get_statedump_fnames_without_timestamps
+{
+ ls | grep -E "[.]dump[.][0-9][0-9]*" | cut -f1-3 -d'.' | sort -u
+}
+
+function get_non_uniq_fields
+{
+ local statedump_fname_prefix=$1
+ print_stack_lkowner_unique_in_one_line "$statedump_fname_prefix" | sort | uniq -c | grep -vE "^\s*1 " | awk '{$1="repeats="$1; print $0}'
+}
+
+function print_stack_lkowner_unique_in_one_line
+{
+ local statedump_fname_prefix=$1
+ sed -e '/./{H;$!d;}' -e 'x;/unique=/!d;/stack=/!d;/lk-owner=/!d;/pid=/!d;' "${statedump_fname_prefix}"* | grep -E "(stack|lk-owner|unique|pid)=" | paste -d " " - - - -
+}
+
+function get_stacks_that_appear_in_multiple_statedumps
+{
+ #If a stack with same 'unique/lk-owner/stack' appears in multiple statedumps
+ #print the stack
+ local statedump_fname_prefix=$1
+ while read -r non_uniq_stack;
+ do
+ if [ -z "$printed" ];
+ then
+ printed="1"
+ fi
+ echo "$statedump_fname_prefix" "$non_uniq_stack"
+ done < <(get_non_uniq_fields "$statedump_fname_prefix")
+}
+
+statedumpdir=${1}
+if [ -z "$statedumpdir" ];
+then
+ echo "Usage: $0 <statedump-dir>"
+ exit 1
+fi
+
+if [ ! -d "$statedumpdir" ];
+then
+ echo "$statedumpdir: Is not a directory"
+ echo "Usage: $0 <statedump-dir>"
+ exit 1
+fi
+
+cd "$statedumpdir" || exit 1
+for statedump_fname_prefix in $(get_statedump_fnames_without_timestamps);
+do
+ get_stacks_that_appear_in_multiple_statedumps "$statedump_fname_prefix"
+done | column -t
+echo "NOTE: stacks with lk-owner=\"\"/lk-owner=0000000000000000/unique=0 may not be hung frames and need further inspection" >&2
diff --git a/extras/mount-shared-storage.sh b/extras/mount-shared-storage.sh
index e99233f7e1e..cc40e13c3e3 100755
--- a/extras/mount-shared-storage.sh
+++ b/extras/mount-shared-storage.sh
@@ -21,7 +21,7 @@ do
continue
fi
- mount -t glusterfs "${arr[0]}" "${arr[1]}"
+ mount -t glusterfs -o "${arr[3]}" "${arr[0]}" "${arr[1]}"
#wait for few seconds
sleep 10
diff --git a/extras/ocf/volume.in b/extras/ocf/volume.in
index 46dd20b8ced..76cc649e55f 100755
--- a/extras/ocf/volume.in
+++ b/extras/ocf/volume.in
@@ -6,6 +6,7 @@
# HA resource
#
# Authors: Florian Haas (hastexo Professional Services GmbH)
+# Jiri Lunacek (Hosting90 Systems s.r.o.)
#
# License: GNU General Public License (GPL)
@@ -54,6 +55,14 @@ must have clone ordering enabled.
<shortdesc lang="en">gluster executable</shortdesc>
<content type="string" default="$OCF_RESKEY_binary_default"/>
</parameter>
+ <parameter name="peer_map">
+ <longdesc lang="en">
+ Mapping of hostname - peer name in the gluster cluster
+ in format hostname1:peername1,hostname2:peername2,...
+ </longdesc>
+ <shortdesc lang="en">gluster peer map</shortdesc>
+ <content type="string" default=""/>
+ </parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
@@ -68,6 +77,10 @@ EOF
}
+if [ -n "${OCF_RESKEY_peer_map}" ]; then
+ SHORTHOSTNAME=`echo "${OCF_RESKEY_peer_map}" | egrep -o "$SHORTHOSTNAME\:[^,]+" | awk -F: '{print $2}'`
+fi
+
volume_getdir() {
local voldir
voldir="@GLUSTERD_WORKDIR@/vols/${OCF_RESKEY_volname}"
@@ -108,6 +121,10 @@ volume_getpids() {
volpid_dir=`volume_getpid_dir`
bricks=`volume_getbricks`
+
+ if [ -z "$bricks" ]; then
+ return 1
+ fi
for brick in ${bricks}; do
pidfile="${volpid_dir}/${SHORTHOSTNAME}${brick}.pid"
@@ -214,6 +231,11 @@ volume_validate_all() {
# Test for required binaries
check_binary $OCF_RESKEY_binary
+
+ if [ -z "$SHORTHOSTNAME" ]; then
+ ocf_log err 'Unable to get host in node map'
+ return $OCF_ERR_CONFIGURED
+ fi
return $OCF_SUCCESS
}
diff --git a/extras/python/Makefile.am b/extras/python/Makefile.am
new file mode 100644
index 00000000000..7d81fa0319b
--- /dev/null
+++ b/extras/python/Makefile.am
@@ -0,0 +1,7 @@
+if HAVE_PYTHON
+# Install __init__.py into the Python site-packages area
+pypkgdir = @BUILD_PYTHON_SITE_PACKAGES@/gluster
+pypkg_PYTHON = __init__.py
+endif
+
+EXTRA_DIST = __init__.py
diff --git a/xlators/features/glupy/src/__init__.py.in b/extras/python/__init__.py
index 3ad9513f40e..3ad9513f40e 100644
--- a/xlators/features/glupy/src/__init__.py.in
+++ b/extras/python/__init__.py
diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py
index f03895de114..e62f7fc52a3 100755
--- a/extras/quota/quota_fsck.py
+++ b/extras/quota/quota_fsck.py
@@ -52,17 +52,17 @@ epilog_msg='''
def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None):
if log_type == QUOTA_VERBOSE:
- print('%-24s %-60s\nxattr_values: %s\n%s\n' % {"Verbose", path, xattr_dict, stbuf})
+ print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf))
elif log_type == QUOTA_META_ABSENT:
- print('%-24s %-60s\n%s\n' % {"Quota-Meta Absent", path, xattr_dict})
+ print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict))
elif log_type == QUOTA_SIZE_MISMATCH:
print("mismatch")
if dir_size is not None:
- print('%24s %60s %12s %12s' % {"Size Mismatch", path, xattr_dict['contri_size'],
- dir_size})
+ print('%24s %60s %12s %12s' % ("Size Mismatch", path,
+ xattr_dict, dir_size))
else:
- print('%-24s %-60s %-12i %-12i' % {"Size Mismatch", path, xattr_dict['contri_size'],
- stbuf.st_size})
+ print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict,
+ stbuf.st_size))
def size_differs_lot(s1, s2):
'''
@@ -156,12 +156,10 @@ def get_quota_xattr_brick(dpath):
xattr_dict = {}
xattr_dict['parents'] = {}
- for xattr in pairs:
+ for xattr in pairs[1:]:
+ xattr = xattr.decode("utf-8")
xattr_key = xattr.split("=")[0]
- if re.search("# file:", xattr_key):
- # skip the file comment
- continue
- elif xattr_key is "":
+ if xattr_key == "":
# skip any empty lines
continue
elif not re.search("quota", xattr_key):
diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py
index 1127be0e976..0e4df77d481 100755
--- a/extras/snap_scheduler/gcron.py
+++ b/extras/snap_scheduler/gcron.py
@@ -19,10 +19,10 @@ import logging.handlers
import fcntl
-GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks"
+GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks"
GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks"
GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag"
-LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/"
+LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/"
log = logging.getLogger("gcron-logger")
start_time = 0.0
@@ -38,7 +38,8 @@ def initLogger(script_name):
sh.setFormatter(formatter)
process = subprocess.Popen(["gluster", "--print-logdir"],
- stdout=subprocess.PIPE)
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
out, err = process.communicate()
if process.returncode == 0:
logfile = os.path.join(out.strip(), script_name[:-3]+".log")
diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py
index a66c5e3d5ce..e8fcc449a9b 100755
--- a/extras/snap_scheduler/snap_scheduler.py
+++ b/extras/snap_scheduler/snap_scheduler.py
@@ -67,7 +67,7 @@ except ImportError:
SCRIPT_NAME = "snap_scheduler"
scheduler_enabled = False
log = logging.getLogger(SCRIPT_NAME)
-SHARED_STORAGE_DIR="/var/run/gluster/shared_storage"
+SHARED_STORAGE_DIR="/run/gluster/shared_storage"
GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled"
GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled"
GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks"
@@ -149,7 +149,7 @@ def initLogger():
sh.setFormatter(formatter)
process = subprocess.Popen(["gluster", "--print-logdir"],
- stdout=subprocess.PIPE)
+ stdout=subprocess.PIPE, universal_newlines=True)
logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log")
fh = logging.FileHandler(logfile)
diff --git a/extras/statedumpparse.rb b/extras/statedumpparse.rb
new file mode 100755
index 00000000000..1aff43377db
--- /dev/null
+++ b/extras/statedumpparse.rb
@@ -0,0 +1,208 @@
+#!/usr/bin/env ruby
+
+require 'time'
+require 'optparse'
+
+unless Array.instance_methods.include? :to_h
+ class Array
+ def to_h
+ h = {}
+ each { |k,v| h[k]=v }
+ h
+ end
+ end
+end
+
+# statedump.c:gf_proc_dump_mempool_info uses a five-dash record separator,
+# client.c:client_fd_lk_ctx_dump uses a six-dash record separator.
+ARRSEP = /^(-{5,6}=-{5,6})?$/
+HEAD = /^\[(.*)\]$/
+INPUT_FORMATS = %w[statedump json]
+
+format = 'json'
+input_format = 'statedump'
+tz = '+0000'
+memstat_select,memstat_reject = //,/\Z./
+OptionParser.new do |op|
+ op.banner << " [<] <STATEDUMP>"
+ op.on("-f", "--format=F", "json/yaml/memstat(-[plain|human|json])") { |s| format = s }
+ op.on("--input-format=F", INPUT_FORMATS.join(?/)) { |s| input_format = s }
+ op.on("--timezone=T",
+ "time zone to apply to zoneless timestamps [default UTC]") { |s| tz = s }
+ op.on("--memstat-select=RX", "memstat: select memory types matching RX") { |s|
+ memstat_select = Regexp.new s
+ }
+ op.on("--memstat-reject=RX", "memstat: reject memory types matching RX") { |s|
+ memstat_reject = Regexp.new s
+ }
+end.parse!
+
+
+if format =~ /\Amemstat(?:-(.*))?/
+ memstat_type = $1 || 'plain'
+ unless %w[plain human json].include? memstat_type
+ raise "unknown memstat type #{memstat_type.dump}"
+ end
+ format = 'memstat'
+end
+
+repr, logsep = case format
+when 'yaml'
+ require 'yaml'
+
+ [proc { |e| e.to_yaml }, "\n"]
+when 'json', 'memstat'
+ require 'json'
+
+ [proc { |e| e.to_json }, " "]
+else
+ raise "unkonwn format '#{format}'"
+end
+formatter = proc { |e| puts repr.call(e) }
+
+INPUT_FORMATS.include? input_format or raise "unkwown input format '#{input_format}'"
+
+dumpinfo = {}
+
+# parse a statedump entry
+elem_cbk = proc { |s,&cbk|
+ arraylike = false
+ s.grep(/\S/).empty? and next
+ head = nil
+ while s.last =~ /^\s*$/
+ s.pop
+ end
+ body = catch { |misc2|
+ s[0] =~ HEAD ? (head = $1) : (throw misc2)
+ body = [[]]
+ s[1..-1].each { |l|
+ if l =~ ARRSEP
+ arraylike = true
+ body << []
+ next
+ end
+ body.last << l
+ }
+
+ body.reject(&:empty?).map { |e|
+ ea = e.map { |l|
+ k,v = l.split("=",2)
+ m = /\A(0|-?[1-9]\d*)(\.\d+)?\Z/.match v
+ [k, m ? (m[2] ? Float(v) : Integer(v)) : v]
+ }
+ begin
+ ea.to_h
+ rescue
+ throw misc2
+ end
+ }
+ }
+
+ if body
+ cbk.call [head, arraylike ? body : (body.empty? ? {} : body[0])]
+ else
+ STDERR.puts ["WARNING: failed to parse record:", repr.call(s)].join(logsep)
+ end
+}
+
+# aggregator routine
+aggr = case format
+when 'memstat'
+ meminfo = {}
+ # commit memory-related entries to meminfo
+ proc { |k,r|
+ case k
+ when /memusage/
+ (meminfo["GF_MALLOC"]||={})[k] ||= r["size"] if k =~ memstat_select and k !~ memstat_reject
+ when "mempool"
+ r.each {|e|
+ kk = "mempool:#{e['pool-name']}"
+ (meminfo["mempool"]||={})[kk] ||= e["size"] if kk =~ memstat_select and kk !~ memstat_reject
+ }
+ end
+ }
+else
+ # just format data, don't actually aggregate anything
+ proc { |pair| formatter.call pair }
+end
+
+# processing the data
+case input_format
+when 'statedump'
+ acc = []
+ $<.each { |l|
+ l = l.strip
+ if l =~ /^(DUMP-(?:START|END)-TIME):\s+(.*)/
+ dumpinfo["_meta"]||={}
+ (dumpinfo["_meta"]["date"]||={})[$1] = Time.parse([$2, tz].join " ")
+ next
+ end
+
+ if l =~ HEAD
+ elem_cbk.call(acc, &aggr)
+ acc = [l]
+ next
+ end
+
+ acc << l
+ }
+ elem_cbk.call(acc, &aggr)
+when 'json'
+ $<.each { |l|
+ r = JSON.load l
+ case r
+ when Array
+ aggr[r]
+ when Hash
+ dumpinfo.merge! r
+ end
+ }
+end
+
+# final actions: output aggregated data
+case format
+when 'memstat'
+ ma = meminfo.values.map(&:to_a).inject(:+)
+ totals = meminfo.map { |coll,h| [coll, h.values.inject(:+)] }.to_h
+ tt = ma.transpose[1].inject(:+)
+
+ summary_sep,showm = case memstat_type
+ when 'json'
+ ["", proc { |k,v| puts({type: k, value: v}.to_json) }]
+ when 'plain', 'human'
+ # human-friendly number representation
+ hr = proc { |n|
+ qa = %w[B kB MB GB]
+ q = ((1...qa.size).find {|i| n < (1 << i*10)} || qa.size) - 1
+ "%.2f%s" % [n.to_f / (1 << q*10), qa[q]]
+ }
+
+ templ = "%{val} %{key}"
+ tft = proc { |t| t }
+ nft = if memstat_type == 'human'
+ nw = [ma.transpose[1], totals.values, tt].flatten.map{|n| hr[n].size}.max
+ proc { |n|
+ hn = hr[n]
+ " " * (nw - hn.size) + hn
+ }
+ else
+ nw = tt.to_s.size
+ proc { |n| "%#{nw}d" % n }
+ end
+ ## Alternative template, key first:
+ # templ = "%{key} %{val}"
+ # tw = ma.transpose[0].map(&:size).max
+ # tft = proc { |t| t + " " * [tw - t.size, 0].max }
+ # nft = (memstat_type == 'human') ? hr : proc { |n| n }
+ ["\n", proc { |k,v| puts templ % {key: tft[k], val: nft[v]} }]
+ else
+ raise 'this should be impossible'
+ end
+
+ ma.sort_by { |k,v| v }.each(&showm)
+ print summary_sep
+ totals.each { |coll,t| showm.call "Total #{coll}", t }
+ showm.call "TOTAL", tt
+else
+ formatter.call dumpinfo
+end
diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am
index d4a3d0bf878..61446a9b84a 100644
--- a/extras/systemd/Makefile.am
+++ b/extras/systemd/Makefile.am
@@ -1,10 +1,14 @@
-CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service
-EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in
+CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service gluster-ta-volume.service
+EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in gluster-ta-volume.service.in
+
+if USE_SYSTEMD
+systemd_DATA = gluster-ta-volume.service
+endif
if WITH_SERVER
if USE_SYSTEMD
# systemddir is already defined through configure.ac
-systemd_DATA = glusterd.service glusterfssharedstorage.service
+systemd_DATA += glusterd.service glusterfssharedstorage.service
if BUILD_EVENTS
systemd_DATA += glustereventsd.service
diff --git a/extras/systemd/gluster-ta-volume.service.in b/extras/systemd/gluster-ta-volume.service.in
new file mode 100644
index 00000000000..2802bca05bf
--- /dev/null
+++ b/extras/systemd/gluster-ta-volume.service.in
@@ -0,0 +1,13 @@
+[Unit]
+Description=GlusterFS, Thin-arbiter process to maintain quorum for replica volume
+After=network.target
+
+[Service]
+Environment="LOG_LEVEL=WARNING"
+ExecStart=@prefix@/sbin/glusterfsd -N --volfile-id ta -f @GLUSTERD_WORKDIR@/thin-arbiter/thin-arbiter.vol --brick-port 24007 --xlator-option ta-server.transport.socket.listen-port=24007 -LWARNING
+Restart=always
+KillMode=process
+SuccessExitStatus=15
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in
index 78f8e40b0e4..abb0d82911f 100644
--- a/extras/systemd/glusterd.service.in
+++ b/extras/systemd/glusterd.service.in
@@ -1,7 +1,10 @@
[Unit]
Description=GlusterFS, a clustered file-system server
-Requires=rpcbind.service
-After=network.target rpcbind.service
+Documentation=man:glusterd(8)
+StartLimitBurst=6
+StartLimitIntervalSec=3600
+Requires=@RPCBIND_SERVICE@
+After=network.target @RPCBIND_SERVICE@
Before=network-online.target
[Service]
@@ -9,10 +12,15 @@ Type=forking
PIDFile=@localstatedir@/run/glusterd.pid
LimitNOFILE=65536
Environment="LOG_LEVEL=INFO"
-EnvironmentFile=-@sysconfdir@/sysconfig/glusterd
+EnvironmentFile=-@SYSCONF_DIR@/sysconfig/glusterd
ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-level $LOG_LEVEL $GLUSTERD_OPTIONS
KillMode=process
+TimeoutSec=300
SuccessExitStatus=15
+Restart=on-abnormal
+RestartSec=60
+StartLimitBurst=6
+StartLimitInterval=3600
[Install]
WantedBy=multi-user.target
diff --git a/extras/systemd/glustereventsd.service.in b/extras/systemd/glustereventsd.service.in
index 4bfcf42f386..f80b78199f6 100644
--- a/extras/systemd/glustereventsd.service.in
+++ b/extras/systemd/glustereventsd.service.in
@@ -1,6 +1,8 @@
[Unit]
Description=Gluster Events Notifier
-After=syslog.target network.target
+After=network.target
+Documentation=man:glustereventsd(8)
+
[Service]
Environment=PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES_EXPANDED@:$PYTHONPATH
diff --git a/extras/thin-arbiter/gluster-ta-volume.service b/extras/thin-arbiter/gluster-ta-volume.service
deleted file mode 100644
index 19be1757555..00000000000
--- a/extras/thin-arbiter/gluster-ta-volume.service
+++ /dev/null
@@ -1,13 +0,0 @@
-[Unit]
-Description = Thin-arbiter process to maintain quorum for replica volume
-After = network.target
-
-[Service]
-Environment = "LOG_LEVEL=WARNING"
-ExecStart = /usr/local/sbin/glusterfsd -N --volfile-id ta-vol -f /var/lib/glusterd/thin-arbiter/thin-arbiter.vol --brick-port 24007 --xlator-option ta-vol-server.transport.socket.listen-port=24007
-Restart = always
-KillMode=process
-SuccessExitStatus=15
-
-[Install]
-WantedBy = multi-user.target
diff --git a/extras/thin-arbiter/setup-thin-arbiter.sh b/extras/thin-arbiter/setup-thin-arbiter.sh
index 32fe7fc0cdd..0681b30ef3f 100755
--- a/extras/thin-arbiter/setup-thin-arbiter.sh
+++ b/extras/thin-arbiter/setup-thin-arbiter.sh
@@ -1,17 +1,62 @@
-#! /bin/bash
+#!/bin/bash
+# Copyright (c) 2018-2019 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+
+# This tool has been developed to setup thin-arbiter process on a node.
+# Seting up a thin arbiter process involves following files -
+# 1 - thin-arbiter.vol
+# Thin-arbiter (TA) process will use the graph in this file to load the
+# required translators.
+# 2 - gluster-ta-volume.service (generated by gluster-ta-volume.service.in)
+# TA process would be running as systemd service.
+#
+# TA process uses a location to save TA id files for every subvolume.
+# This location can be taken as input from user. Once provided and the
+# TA process is started on a node, it can not be changed using this
+# script or by any other mean. The same location should be used in
+# the gluster CLI when creating thin-arbiter volumes.
+
+MYPATH=`dirname $0`
volloc="/var/lib/glusterd/thin-arbiter"
mkdir -p $volloc
-cp -f extras/thin-arbiter/thin-arbiter.vol $volloc/thin-arbiter.vol
+if [ -f /etc/glusterfs/thin-arbiter.vol ]; then
+ volfile=/etc/glusterfs/thin-arbiter.vol
+else
+ volfile=$MYPATH/thin-arbiter.vol
+fi
+
tafile="$volloc/thin-arbiter.vol"
+
+help () {
+ echo " "
+ echo ' This tool helps to setup thin-arbiter (TA) process on a node.
+ TA process uses a location to save TA id files for every subvolume.
+ This location can be taken as input from user. Once provided and the
+ TA process is started on a node, it can not be changed using this script
+ or by any other mean. The same location should be used in gluster CLI
+ when creating thin-arbiter volumes.
+
+ usage: setup-thin-arbiter.sh [-s] [-h]
+ options:
+ -s - Setup thin-arbiter file path and start process
+ -h - Show this help message and exit
+'
+}
+
volfile_set_brick_path () {
while read -r line
do
dir=`echo "$line" | cut -d' ' -f 2`
- if [ "$dir" = "directory" ]
- then
+ if [ "$dir" = "directory" ]; then
bpath=`echo "$line" | cut -d' ' -f 3`
sed -i -- 's?'$bpath'?'$1'?g' $tafile
return
@@ -19,50 +64,121 @@ volfile_set_brick_path () {
done < $tafile
}
-tapath="/mnt/thin-arbiter"
-echo "Volume file to be used to start thin-arbiter process is :"
-echo "$tafile"
-echo " "
-echo "Default thin-arbiter path is : $tapath"
-echo -n "Do you want to change path for thin arbiter volumes. (y/N): "
-echo " "
-read moveon
-
-if [ "${moveon}" = 'N' ] || [ "${moveon}" = 'n' ]; then
- echo "Default brick path, $tapath, has been set"
- echo "for all thin arbiter volumes using this node"
+check_ta_proc () {
+ pro=`ps aux | grep thin-arbiter.vol | grep "volfile-id"`
+ if [ "${pro}" = '' ]; then
+ echo ""
+ else
+ curr_loc=`cat $volloc/thin-arbiter.vol | grep option | grep directory`
+ loc=`echo "${curr_loc##* }"`
+ echo "******************************************************"
+ echo "Error:"
+ echo "Thin-arbiter process is running with thin-arbiter path = $loc"
+ echo "Can not change TA path on this host now."
+ echo "$pro"
+ echo "******************************************************"
+ exit 1
+ fi
+}
+
+getpath () {
+ check_ta_proc
+ echo "******************************************************"
+ echo "User will be required to enter a path/folder for arbiter volume."
+ echo "Please note that this path will be used for ALL VOLUMES using this"
+ echo "node to host thin-arbiter. After setting, if a volume"
+ echo "has been created using this host and path then path for"
+ echo "thin-arbiter can not be changed "
+ echo "******************************************************"
echo " "
-else
- echo -n "Enter brick path for thin arbiter volumes: "
- read tapath
- echo "Entered brick path : $tapath "
- echo "Please note that this brick path will be used for ALL"
- echo "VOLUMES using this node to host thin-arbiter brick"
+ while true;
+ do
+ echo -n "Enter brick path for thin arbiter volumes: "
+ echo " "
+ read tapath
+ if [ "${tapath}" = '' ]; then
+ echo "Please enter valid path"
+ continue
+ else
+ echo "Entered brick path : $tapath "
+ echo "Please note that this brick path will be used for ALL"
+ echo "VOLUMES using this node to host thin-arbiter brick"
+ echo -n "Want to continue? (y/N): "
+ echo " "
+ read cont
+
+ if [ "${cont}" = 'N' ] || [ "${cont}" = 'n' ]; then
+ exit 0
+ else
+ break
+ fi
+ fi
+ done
+}
+
+setup () {
+ getpath
+ mkdir -p $tapath/.glusterfs/indices
+ if [ -d $tapath/.glusterfs/indices ]; then
+ echo " "
+ else
+ echo "Could not create $tapath/.glusterfs/indices directory, check provided ta path."
+ exit 1
+ fi
+
+ cp -f --backup --suffix=_old $volfile $volloc/thin-arbiter.vol
+ volfile_set_brick_path "$tapath"
+
+ echo "Directory path to be used for thin-arbiter volume is: $tapath"
echo " "
-fi
+ echo "========================================================"
-mkdir -p $tapath/.glusterfs/indices
-volfile_set_brick_path "$tapath"
+ if [ -f /usr/lib/systemd/system/gluster-ta-volume.service ]; then
+ echo "Starting thin-arbiter process"
+ else
+ cp $MYPATH/../systemd/gluster-ta-volume.service /etc/systemd/system/
+ echo "Starting thin-arbiter process"
+ chmod 0644 /etc/systemd/system/gluster-ta-volume.service
+ fi
-echo "Directory path to be used for thin-arbiter volume is: $tapath"
-echo " "
+ systemctl daemon-reload
+ systemctl enable gluster-ta-volume
+ systemctl stop gluster-ta-volume
+ systemctl start gluster-ta-volume
-echo "========================================================"
+ if [ $? == 0 ]; then
+ echo "thin-arbiter process has been setup and running"
+ else
+ echo "Failed to setup thin arbiter"
+ exit 1
+ fi
-echo "Installing and starting service for thin-arbiter process"
+}
-cp extras/thin-arbiter/gluster-ta-volume.service /etc/systemd/system/
+main()
+{
-chmod 0777 /etc/systemd/system/gluster-ta-volume.service
+ if [ "$#" -ne 1 ]; then
+ help
+ exit 0
+ fi
-systemctl daemon-reload
-systemctl enable gluster-ta-volume
-systemctl stop gluster-ta-volume
-systemctl start gluster-ta-volume
+ while getopts "sh" opt; do
+ case $opt in
+ h)
+ help
+ exit 0
+ ;;
+ s)
+ setup
+ exit 0
+ ;;
+ *)
+ help
+ exit 0
+ ;;
+ esac
+ done
+}
-if [ $? == 0 ]
-then
- echo "thin-arbiter process is setup and running"
-else
- echo "Failed to setup thin arbiter"
-fi
+main "$@"
diff --git a/extras/thin-arbiter/thin-arbiter.vol b/extras/thin-arbiter/thin-arbiter.vol
index 244a4caf485..c76babc7b3c 100644
--- a/extras/thin-arbiter/thin-arbiter.vol
+++ b/extras/thin-arbiter/thin-arbiter.vol
@@ -33,11 +33,10 @@ volume ta-index
subvolumes ta-io-threads
end-volume
-volume ta-io-stats
+volume /mnt/thin-arbiter
type debug/io-stats
option count-fop-hits off
option latency-measurement off
- option log-level WARNING
option unique-id /mnt/thin-arbiter
subvolumes ta-index
end-volume
@@ -54,5 +53,5 @@ volume ta-server
option auth-path /mnt/thin-arbiter
option transport.address-family inet
option transport-type tcp
- subvolumes ta-io-stats
+ subvolumes /mnt/thin-arbiter
end-volume
diff --git a/extras/who-wrote-glusterfs/gitdm.domain-map b/extras/who-wrote-glusterfs/gitdm.domain-map
index 315355b08b8..7cd2bbd605b 100644
--- a/extras/who-wrote-glusterfs/gitdm.domain-map
+++ b/extras/who-wrote-glusterfs/gitdm.domain-map
@@ -4,6 +4,7 @@
active.by ActiveCloud
appeartv.com Appear TV
cern.ch CERN
+cmss.chinamobile.com China Mobile(Suzhou) Software Technology
datalab.es DataLab S.L.
fb.com Facebook
fedoraproject.org Fedora Project
diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
index 6160c7c7091..9688c79fab7 100644
--- a/geo-replication/gsyncd.conf.in
+++ b/geo-replication/gsyncd.conf.in
@@ -23,6 +23,11 @@ configurable=false
type=int
value=1
+[master-distribution-count]
+configurable=false
+type=int
+value=1
+
[glusterd-workdir]
value = @GLUSTERD_WORKDIR@
@@ -109,7 +114,7 @@ type=int
help=Minimum time interval in seconds for passive worker to become Active
[changelog-archive-format]
-value=%%Y%%m
+value=%Y%m
help=Processed changelogs will be archived in working directory. Pattern for archive file
[use-meta-volume]
@@ -118,7 +123,7 @@ type=bool
help=Use this to set Active Passive mode to meta-volume.
[meta-volume-mnt]
-value=/var/run/gluster/shared_storage
+value=/run/gluster/shared_storage
help=Meta Volume or Shared Volume mount path
[allow-network]
@@ -128,10 +133,11 @@ value=
value=5
type=int
-[use-tarssh]
-value=false
-type=bool
-help=Use sync-mode as tarssh
+[sync-method]
+value=rsync
+help=Sync method for data sync. Available methods are tar over ssh and rsync. Default is rsync.
+validation=choice
+allowed_values=tarssh,rsync
[remote-gsyncd]
value =
@@ -260,7 +266,9 @@ allowed_values=ERROR,INFO,WARNING,DEBUG
[ssh-port]
value=22
-validation=int
+validation=minmax
+min=1
+max=65535
help=Set SSH port
type=int
diff --git a/geo-replication/setup.py b/geo-replication/setup.py
index 6d678baa2f7..0eae469d2d6 100644
--- a/geo-replication/setup.py
+++ b/geo-replication/setup.py
@@ -1,7 +1,7 @@
#
# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
# This file is part of GlusterFS.
-
+#
# This file is licensed to you under your choice of the GNU Lesser
# General Public License, version 3 or any later version (LGPLv3 or
# later), or the GNU General Public License, version 2 (GPLv2), in all
@@ -20,11 +20,11 @@ setup(
name=name,
version="",
description='GlusterFS Geo Replication',
- license='',
+ license='GPLV2 and LGPLV3+',
author='Red Hat, Inc.',
author_email='gluster-devel@gluster.org',
url='http://www.gluster.org',
- packages=['syncdaemon', ],
+ packages=[name, ],
test_suite='nose.collector',
install_requires=[],
scripts=[],
diff --git a/geo-replication/src/gsyncd.c b/geo-replication/src/gsyncd.c
index cf0e76f69c4..b5aeec5bf33 100644
--- a/geo-replication/src/gsyncd.c
+++ b/geo-replication/src/gsyncd.c
@@ -7,8 +7,8 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "compat.h"
-#include "syscall.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/syscall.h>
#include <stdlib.h>
#include <stdio.h>
@@ -24,13 +24,13 @@
* We unconditionally pass then while building gsyncd binary.
*/
#ifdef USE_LIBGLUSTERFS
-#include "glusterfs.h"
-#include "globals.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/defaults.h>
#endif
-#include "common-utils.h"
-#include "run.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/run.h>
#include "procdiggy.h"
#define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_"
diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh
index d048de0992b..f5f70d245e0 100755
--- a/geo-replication/src/gverify.sh
+++ b/geo-replication/src/gverify.sh
@@ -94,6 +94,7 @@ echo $cmd_line;
function master_stats()
{
MASTERVOL=$1;
+ local inet6=$2;
local d;
local i;
local disk_size;
@@ -102,7 +103,12 @@ function master_stats()
local m_status;
d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null);
- glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d;
+ if [ "$inet6" = "inet6" ]; then
+ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-id $MASTERVOL -l $master_log_file $d;
+ else
+ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d;
+ fi
+
i=$(get_inode_num $d);
if [[ "$i" -ne "1" ]]; then
echo 0:0;
@@ -124,12 +130,18 @@ function slave_stats()
SLAVEUSER=$1;
SLAVEHOST=$2;
SLAVEVOL=$3;
+ local inet6=$4;
local cmd_line;
local ver;
local status;
d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null);
- glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
+ if [ "$inet6" = "inet6" ]; then
+ glusterfs --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
+ else
+ glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
+ fi
+
i=$(get_inode_num $d);
if [[ "$i" -ne "1" ]]; then
echo 0:0;
@@ -167,6 +179,10 @@ function main()
log_file=$6
> $log_file
+ inet6=$7
+ local cmd_line
+ local ver
+
# Use FORCE_BLOCKER flag in the error message to differentiate
# between the errors which the force command should bypass
@@ -192,20 +208,21 @@ function main()
exit 1;
fi;
+ cmd_line=$(cmd_slave);
if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
- err=$((ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "gluster --version") 2>&1)
+ ver=$(ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'")
else
- err=$((ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "gluster --version") 2>&1)
+ ver=$(ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'")
fi
- if [ $? -ne 0 ]; then
- echo "FORCE_BLOCKER|gluster command on $2@$3 failed. Error: $err" > $log_file
+ if [ -z "$ver" ]; then
+ echo "FORCE_BLOCKER|gluster command not found on $3 for user $2." > $log_file
exit 1;
fi;
ERRORS=0;
- master_data=$(master_stats $1);
- slave_data=$(slave_stats $2 $3 $4);
+ master_data=$(master_stats $1 ${inet6});
+ slave_data=$(slave_stats $2 $3 $4 ${inet6});
master_disk_size=$(echo $master_data | cut -f1 -d':');
slave_disk_size=$(echo $slave_data | cut -f1 -d':');
master_used_size=$(echo $master_data | cut -f2 -d':');
diff --git a/geo-replication/src/peer_georep-sshkey.py.in b/geo-replication/src/peer_georep-sshkey.py.in
index 2196fd7491a..58696e9a616 100644
--- a/geo-replication/src/peer_georep-sshkey.py.in
+++ b/geo-replication/src/peer_georep-sshkey.py.in
@@ -30,8 +30,8 @@ from prettytable import PrettyTable
SECRET_PEM = "@GLUSTERD_WORKDIR@/geo-replication/secret.pem"
TAR_SSH_PEM = "@GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem"
-GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" '
-TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" '
+GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" '
+TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" '
COMMON_SECRET_FILE = "@GLUSTERD_WORKDIR@/geo-replication/common_secret.pem.pub"
diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in
index 05c1638bdcd..6d4a4847013 100755
--- a/geo-replication/src/peer_gsec_create.in
+++ b/geo-replication/src/peer_gsec_create.in
@@ -18,7 +18,7 @@ if [ "Xcontainer" = "X$1" ]; then
output1=`cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
output2=`cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
else
- output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
- output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" " "``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
+ output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
+ output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
fi
echo -e "$output1\n$output2"
diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in
index 54f95c4b38d..40b90ffc560 100644
--- a/geo-replication/src/peer_mountbroker.py.in
+++ b/geo-replication/src/peer_mountbroker.py.in
@@ -47,7 +47,7 @@ class MountbrokerUserMgmt(object):
for line in f:
line = line.strip()
if line.startswith("option "):
- key, value = line.split(" ")[1:]
+ key, value = line.split()[1:]
self._options[key] = value
if line.startswith("#"):
self.commented_lines.append(line)
@@ -197,7 +197,7 @@ class NodeSetup(Cmd):
execute(["chgrp", "-R", args.group, GEOREP_DIR])
execute(["chgrp", "-R", args.group, LOG_DIR])
execute(["chgrp", args.group, CLI_LOG])
- execute(["chmod", "770", args.group, GEOREP_DIR])
+ execute(["chmod", "770", GEOREP_DIR])
execute(["find", LOG_DIR, "-type", "d", "-exec", "chmod", "770", "{}",
"+"])
execute(["find", LOG_DIR, "-type", "f", "-exec", "chmod", "660", "{}",
@@ -222,8 +222,10 @@ class CliSetup(Cmd):
name = "setup"
def args(self, parser):
- parser.add_argument("mount_root")
- parser.add_argument("group")
+ parser.add_argument("mount_root",
+ help="Path to the mountbroker-root directory.")
+ parser.add_argument("group",
+ help="Group to be used for setup.")
def run(self, args):
out = execute_in_peers("node-setup", [args.mount_root,
@@ -333,8 +335,10 @@ class CliAdd(Cmd):
name = "add"
def args(self, parser):
- parser.add_argument("volume")
- parser.add_argument("user")
+ parser.add_argument("volume",
+ help="Volume to be added.")
+ parser.add_argument("user",
+ help="User for which volume is to be added.")
def run(self, args):
out = execute_in_peers("node-add", [args.volume,
@@ -374,8 +378,9 @@ class CliRemove(Cmd):
name = "remove"
def args(self, parser):
- parser.add_argument("--volume", default=".")
- parser.add_argument("--user", default=".")
+ parser.add_argument("--volume", default=".", help="Volume to be removed.")
+ parser.add_argument("--user", default=".",
+ help="User for which volume has to be removed.")
def run(self, args):
out = execute_in_peers("node-remove", [args.volume,
diff --git a/geo-replication/src/procdiggy.c b/geo-replication/src/procdiggy.c
index 05c1e1edc68..8068ef79a42 100644
--- a/geo-replication/src/procdiggy.c
+++ b/geo-replication/src/procdiggy.c
@@ -15,8 +15,8 @@
#include <ctype.h>
#include <sys/param.h> /* for PATH_MAX */
-#include "common-utils.h"
-#include "syscall.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syscall.h>
#include "procdiggy.h"
pid_t
@@ -31,6 +31,10 @@ pidinfo(pid_t pid, char **name)
};
char *p = NULL;
int ret = 0;
+ pid_t lpid = -1;
+
+ if (name)
+ *name = NULL;
snprintf(path, sizeof path, PROC "/%d/status", pid);
@@ -38,14 +42,12 @@ pidinfo(pid_t pid, char **name)
if (!f)
return -1;
- if (name)
- *name = NULL;
for (;;) {
size_t len;
memset(buf, 0, sizeof(buf));
if (fgets(buf, sizeof(buf), f) == NULL || (len = strlen(buf)) == 0 ||
buf[len - 1] != '\n') {
- pid = -1;
+ lpid = -1;
goto out;
}
buf[len - 1] = '\0';
@@ -57,7 +59,7 @@ pidinfo(pid_t pid, char **name)
;
*name = gf_strdup(p);
if (!*name) {
- pid = -2;
+ lpid = -2;
goto out;
}
continue;
@@ -71,17 +73,17 @@ pidinfo(pid_t pid, char **name)
while (isspace(*++p))
;
- ret = gf_string2int(p, &pid);
+ ret = gf_string2int(p, &lpid);
if (ret == -1)
- pid = -1;
+ lpid = -1;
out:
fclose(f);
- if (pid == -1 && name && *name)
+ if (lpid == -1 && name && *name)
GF_FREE(*name);
- if (pid == -2)
+ if (lpid == -2)
fprintf(stderr, "out of memory\n");
- return pid;
+ return lpid;
}
int
diff --git a/geo-replication/src/set_geo_rep_pem_keys.sh b/geo-replication/src/set_geo_rep_pem_keys.sh
index ae23f4ff0c6..8a43fa39d1f 100755
--- a/geo-replication/src/set_geo_rep_pem_keys.sh
+++ b/geo-replication/src/set_geo_rep_pem_keys.sh
@@ -47,6 +47,7 @@ function main()
cp $home_dir/${COMMON_SECRET_PEM_PUB} ${GLUSTERD_WORKDIR}/geo-replication/
gluster system:: copy file /geo-replication/${COMMON_SECRET_PEM_PUB}
gluster system:: execute add_secret_pub $user geo-replication/${master_vol}_${slave_vol}_common_secret.pem.pub
+ gluster vol set ${slave_vol} features.read-only on
else
echo "$home_dir/common_secret.pem.pub not present. Please run geo-replication command on master with push-pem option to generate the file"
exit 1;
diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am
index 62c5ce7fe30..d70e3368faf 100644
--- a/geo-replication/syncdaemon/Makefile.am
+++ b/geo-replication/syncdaemon/Makefile.am
@@ -2,7 +2,7 @@ syncdaemondir = $(GLUSTERFS_LIBEXECDIR)/python/syncdaemon
syncdaemon_PYTHON = rconf.py gsyncd.py __init__.py master.py README.md repce.py \
resource.py syncdutils.py monitor.py libcxattr.py gsyncdconfig.py \
- libgfchangelog.py changelogagent.py gsyncdstatus.py conf.py logutils.py \
+ libgfchangelog.py gsyncdstatus.py conf.py logutils.py \
subcmds.py argsupgrade.py py2py3.py
CLEANFILES =
diff --git a/geo-replication/syncdaemon/README.md b/geo-replication/syncdaemon/README.md
index 2a202e3f99e..5ab785ae669 100644
--- a/geo-replication/syncdaemon/README.md
+++ b/geo-replication/syncdaemon/README.md
@@ -19,7 +19,6 @@ INSTALLATION
As of now, the supported way of operation is running from the source directory or using the RPMs given.
-If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/).
CONFIGURATION
-------------
diff --git a/geo-replication/syncdaemon/argsupgrade.py b/geo-replication/syncdaemon/argsupgrade.py
index 4018143b8b4..7af40633ef8 100644
--- a/geo-replication/syncdaemon/argsupgrade.py
+++ b/geo-replication/syncdaemon/argsupgrade.py
@@ -84,6 +84,10 @@ def upgrade():
# fail when it does stat to check the existence.
init_gsyncd_template_conf()
+ inet6 = False
+ if "--inet6" in sys.argv:
+ inet6 = True
+
if "--monitor" in sys.argv:
# python gsyncd.py --path=/bricks/b1
# --monitor -c gsyncd.conf
@@ -147,8 +151,11 @@ def upgrade():
user, hname = remote_addr.split("@")
+ if not inet6:
+ hname = gethostbyname(hname)
+
print(("ssh://%s@%s:gluster://127.0.0.1:%s" % (
- user, gethostbyname(hname), vol)))
+ user, hname, vol)))
sys.exit(0)
elif "--normalize-url" in sys.argv:
@@ -346,3 +353,7 @@ def upgrade():
if pargs.reset_sync_time:
sys.argv.append("--reset-sync-time")
+
+ if inet6:
+ # Add `--inet6` as first argument
+ sys.argv = [sys.argv[0], "--inet6"] + sys.argv[1:]
diff --git a/geo-replication/syncdaemon/changelogagent.py b/geo-replication/syncdaemon/changelogagent.py
deleted file mode 100644
index c5fdbc3a74f..00000000000
--- a/geo-replication/syncdaemon/changelogagent.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/python3
-#
-# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
-# This file is part of GlusterFS.
-
-# This file is licensed to you under your choice of the GNU Lesser
-# General Public License, version 3 or any later version (LGPLv3 or
-# later), or the GNU General Public License, version 2 (GPLv2), in all
-# cases as published by the Free Software Foundation.
-#
-
-import logging
-import syncdutils
-from syncdutils import select, CHANGELOG_AGENT_SERVER_VERSION
-from repce import RepceServer
-
-
-class _MetaChangelog(object):
-
- def __getattr__(self, meth):
- from libgfchangelog import Changes as LChanges
- xmeth = [m for m in dir(LChanges) if m[0] != '_']
- if meth not in xmeth:
- return
- for m in xmeth:
- setattr(self, m, getattr(LChanges, m))
- return getattr(self, meth)
-
-Changes = _MetaChangelog()
-
-
-class Changelog(object):
- def version(self):
- return CHANGELOG_AGENT_SERVER_VERSION
-
- def init(self):
- return Changes.cl_init()
-
- def register(self, cl_brick, cl_dir, cl_log, cl_level, retries=0):
- return Changes.cl_register(cl_brick, cl_dir, cl_log, cl_level, retries)
-
- def scan(self):
- return Changes.cl_scan()
-
- def getchanges(self):
- return Changes.cl_getchanges()
-
- def done(self, clfile):
- return Changes.cl_done(clfile)
-
- def history(self, changelog_path, start, end, num_parallel):
- return Changes.cl_history_changelog(changelog_path, start, end,
- num_parallel)
-
- def history_scan(self):
- return Changes.cl_history_scan()
-
- def history_getchanges(self):
- return Changes.cl_history_getchanges()
-
- def history_done(self, clfile):
- return Changes.cl_history_done(clfile)
-
-
-class ChangelogAgent(object):
- def __init__(self, obj, fd_tup):
- (inf, ouf, rw, ww) = fd_tup.split(',')
- repce = RepceServer(obj, int(inf), int(ouf), 1)
- t = syncdutils.Thread(target=lambda: (repce.service_loop(),
- syncdutils.finalize()))
- t.start()
- logging.info('Agent listining...')
-
- select((), (), ())
-
-
-def agent(obj, fd_tup):
- return ChangelogAgent(obj, fd_tup)
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index 037f351151b..257ed72c6ae 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -22,8 +22,8 @@ import gsyncdconfig as gconf
from rconf import rconf
import subcmds
from conf import GLUSTERD_WORKDIR, GLUSTERFS_CONFDIR, GCONF_VERSION
-from syncdutils import set_term_handler, finalize, lf
-from syncdutils import log_raise_exception, FreeObject, escape
+from syncdutils import (set_term_handler, finalize, lf,
+ log_raise_exception, FreeObject, escape)
import argsupgrade
@@ -47,6 +47,7 @@ def main():
sys.exit(0)
parser = ArgumentParser()
+ parser.add_argument("--inet6", action="store_true")
sp = parser.add_subparsers(dest="subcmd")
# Monitor Status File update
@@ -78,8 +79,6 @@ def main():
help="feedback fd between monitor and worker")
p.add_argument("--local-node", help="Local master node")
p.add_argument("--local-node-id", help="Local Node ID")
- p.add_argument("--rpc-fd",
- help="Read and Write fds for worker-agent communication")
p.add_argument("--subvol-num", type=int, help="Subvolume number")
p.add_argument("--is-hottier", action="store_true",
help="Is this brick part of hot tier")
@@ -91,19 +90,6 @@ def main():
p.add_argument("-c", "--config-file", help="Config File")
p.add_argument("--debug", action="store_true")
- # Agent
- p = sp.add_parser("agent")
- p.add_argument("master", help="Master Volume Name")
- p.add_argument("slave", help="Slave details user@host::vol format")
- p.add_argument("--local-path", help="Local brick path")
- p.add_argument("--local-node", help="Local master node")
- p.add_argument("--local-node-id", help="Local Node ID")
- p.add_argument("--slave-id", help="Slave Volume ID")
- p.add_argument("--rpc-fd",
- help="Read and Write fds for worker-agent communication")
- p.add_argument("-c", "--config-file", help="Config File")
- p.add_argument("--debug", action="store_true")
-
# Slave
p = sp.add_parser("slave")
p.add_argument("master", help="Master Volume Name")
@@ -133,6 +119,8 @@ def main():
help="Directory where Gluster binaries exist on slave")
p.add_argument("--slave-access-mount", action="store_true",
help="Do not lazy umount the slave volume")
+ p.add_argument("--master-dist-count", type=int,
+ help="Master Distribution count")
# Status
p = sp.add_parser("status")
@@ -228,7 +216,8 @@ def main():
# Set default path for config file in that case
# If an subcmd accepts config file then it also accepts
# master and Slave arguments.
- if config_file is None and hasattr(args, "config_file"):
+ if config_file is None and hasattr(args, "config_file") \
+ and args.subcmd != "slave":
config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
GLUSTERD_WORKDIR,
args.master,
@@ -252,6 +241,12 @@ def main():
if args.subcmd == "slave":
override_from_args = True
+ if config_file is not None and \
+ args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]:
+ ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"])
+ if ret is not None:
+ gconf.config_upgrade(config_file, ret)
+
# Load Config file
gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf",
config_file,
@@ -261,8 +256,8 @@ def main():
# Default label to print in log file
label = args.subcmd
- if args.subcmd in ("worker", "agent"):
- # If Worker or agent, then add brick path also to label
+ if args.subcmd in ("worker"):
+ # If Worker, then add brick path also to label
label = "%s %s" % (args.subcmd, args.local_path)
elif args.subcmd == "slave":
# If Slave add Master node and Brick details
@@ -305,7 +300,7 @@ def main():
# Log message for loaded config file
if config_file is not None:
- logging.info(lf("Using session config file", path=config_file))
+ logging.debug(lf("Using session config file", path=config_file))
set_term_handler()
excont = FreeObject(exval=0)
diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
index 5d439a4c5ee..8848071997a 100644
--- a/geo-replication/syncdaemon/gsyncdconfig.py
+++ b/geo-replication/syncdaemon/gsyncdconfig.py
@@ -10,12 +10,14 @@
#
try:
- from configparser import ConfigParser, NoSectionError
+ from ConfigParser import RawConfigParser, NoSectionError
except ImportError:
- from ConfigParser import ConfigParser, NoSectionError
+ from configparser import RawConfigParser, NoSectionError
import os
+import shutil
from string import Template
from datetime import datetime
+from threading import Lock
# Global object which can be used in other modules
@@ -34,6 +36,7 @@ class GconfInvalidValue(Exception):
class Gconf(object):
def __init__(self, default_conf_file, custom_conf_file=None,
args={}, extra_tmpl_args={}, override_from_args=False):
+ self.lock = Lock()
self.default_conf_file = default_conf_file
self.custom_conf_file = custom_conf_file
self.tmp_conf_file = None
@@ -91,7 +94,7 @@ class Gconf(object):
if name != "all" and not self._is_configurable(name):
raise GconfNotConfigurable()
- cnf = ConfigParser()
+ cnf = RawConfigParser()
with open(self.custom_conf_file) as f:
cnf.readfp(f)
@@ -135,7 +138,7 @@ class Gconf(object):
if curr_val == value:
return True
- cnf = ConfigParser()
+ cnf = RawConfigParser()
with open(self.custom_conf_file) as f:
cnf.readfp(f)
@@ -162,6 +165,11 @@ class Gconf(object):
if value is not None and not self._is_valid_value(name, value):
raise GconfInvalidValue()
+
+ def _load_with_lock(self):
+ with self.lock:
+ self._load()
+
def _load(self):
self.gconf = {}
self.template_conf = []
@@ -170,7 +178,7 @@ class Gconf(object):
self.session_conf_items = []
self.default_values = {}
- conf = ConfigParser()
+ conf = RawConfigParser()
# Default Template config file
with open(self.default_conf_file) as f:
conf.readfp(f)
@@ -229,12 +237,19 @@ class Gconf(object):
self._tmpl_substitute()
self._do_typecast()
- def reload(self):
+ def reload(self, with_lock=True):
if self._is_config_changed():
- self._load()
+ if with_lock:
+ self._load_with_lock()
+ else:
+ self._load()
- def get(self, name, default_value=None):
- return self.gconf.get(name, default_value)
+ def get(self, name, default_value=None, with_lock=True):
+ if with_lock:
+ with self.lock:
+ return self.gconf.get(name, default_value)
+ else:
+ return self.gconf.get(name, default_value)
def getall(self, show_defaults=False, show_non_configurable=False):
cnf = {}
@@ -275,8 +290,9 @@ class Gconf(object):
return cnf
def getr(self, name, default_value=None):
- self.reload()
- return self.get(name, default_value)
+ with self.lock:
+ self.reload(with_lock=False)
+ return self.get(name, default_value, with_lock=False)
def get_help(self, name=None):
pass
@@ -313,6 +329,9 @@ class Gconf(object):
if item["validation"] == "unixtime":
return validate_unixtime(value)
+ if item["validation"] == "int":
+ return validate_int(value)
+
return False
def _is_config_changed(self):
@@ -325,6 +344,53 @@ class Gconf(object):
return False
+def is_config_file_old(config_file, mastervol, slavevol):
+ cnf = RawConfigParser()
+ cnf.read(config_file)
+ session_section = "peers %s %s" % (mastervol, slavevol)
+ try:
+ return dict(cnf.items(session_section))
+ except NoSectionError:
+ return None
+
+def config_upgrade(config_file, ret):
+ config_file_backup = os.path.join(os.path.dirname(config_file), "gsyncd.conf.bkp")
+
+ #copy old config file in a backup file
+ shutil.copyfile(config_file, config_file_backup)
+
+ #write a new config file
+ config = RawConfigParser()
+ config.add_section('vars')
+
+ for key, value in ret.items():
+ #handle option name changes
+ if key == "use_tarssh":
+ new_key = "sync-method"
+ if value == "true":
+ new_value = "tarssh"
+ else:
+ new_value = "rsync"
+ config.set('vars', new_key, new_value)
+ elif key == "timeout":
+ new_key = "slave-timeout"
+ config.set('vars', new_key, value)
+ #for changes like: ignore_deletes to ignore-deletes
+ else:
+ new_key = key.replace("_", "-")
+ config.set('vars', new_key, value)
+
+ with open(config_file, 'w') as configfile:
+ config.write(configfile)
+
+
+def validate_int(value):
+ try:
+ _ = int(value)
+ return True
+ except ValueError:
+ return False
+
def validate_unixtime(value):
try:
@@ -338,11 +404,13 @@ def validate_unixtime(value):
def validate_minmax(value, minval, maxval):
- value = int(value)
- minval = int(minval)
- maxval = int(maxval)
-
- return value >= minval and value <= maxval
+ try:
+ value = int(value)
+ minval = int(minval)
+ maxval = int(maxval)
+ return value >= minval and value <= maxval
+ except ValueError:
+ return False
def validate_choice(value, allowed_values):
diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py
index 72bcb092f01..1a655ff8887 100644
--- a/geo-replication/syncdaemon/gsyncdstatus.py
+++ b/geo-replication/syncdaemon/gsyncdstatus.py
@@ -23,8 +23,8 @@ from datetime import datetime
from errno import EACCES, EAGAIN, ENOENT
import logging
-from syncdutils import EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event
-from syncdutils import EVENT_GEOREP_CHECKPOINT_COMPLETED, lf
+from syncdutils import (EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event,
+ EVENT_GEOREP_CHECKPOINT_COMPLETED, lf)
DEFAULT_STATUS = "N/A"
MONITOR_STATUS = ("Created", "Started", "Paused", "Stopped")
diff --git a/geo-replication/syncdaemon/libcxattr.py b/geo-replication/syncdaemon/libcxattr.py
index 7f3f6ce453a..e6406c36bd7 100644
--- a/geo-replication/syncdaemon/libcxattr.py
+++ b/geo-replication/syncdaemon/libcxattr.py
@@ -9,9 +9,9 @@
#
import os
-from ctypes import CDLL, create_string_buffer, get_errno
-import py2py3
-from py2py3 import bytearray_to_str
+from ctypes import CDLL, get_errno
+from py2py3 import (bytearray_to_str, gr_create_string_buffer,
+ gr_query_xattr, gr_lsetxattr, gr_lremovexattr)
class Xattr(object):
@@ -40,7 +40,7 @@ class Xattr(object):
@classmethod
def _query_xattr(cls, path, siz, syscall, *a):
if siz:
- buf = create_string_buffer(b'\0' * siz)
+ buf = gr_create_string_buffer(siz)
else:
buf = None
ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
@@ -56,7 +56,7 @@ class Xattr(object):
@classmethod
def lgetxattr(cls, path, attr, siz=0):
- return cls._query_xattr(path.encode(), siz, 'lgetxattr', attr.encode())
+ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr)
@classmethod
def lgetxattr_buf(cls, path, attr):
@@ -70,7 +70,7 @@ class Xattr(object):
@classmethod
def llistxattr(cls, path, siz=0):
- ret = cls._query_xattr(path.encode(), siz, 'llistxattr')
+ ret = gr_query_xattr(cls, path, siz, 'llistxattr')
if isinstance(ret, str):
ret = ret.strip('\0')
ret = ret.split('\0') if ret else []
@@ -78,13 +78,13 @@ class Xattr(object):
@classmethod
def lsetxattr(cls, path, attr, val):
- ret = cls.libc.lsetxattr(path.encode(), attr.encode(), val, len(val), 0)
+ ret = gr_lsetxattr(cls, path, attr, val)
if ret == -1:
cls.raise_oserr()
@classmethod
def lremovexattr(cls, path, attr):
- ret = cls.libc.lremovexattr(path.encode(), attr.encode())
+ ret = gr_lremovexattr(cls, path, attr)
if ret == -1:
cls.raise_oserr()
diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py
index cc40fd5475d..a3bda7282c0 100644
--- a/geo-replication/syncdaemon/libgfchangelog.py
+++ b/geo-replication/syncdaemon/libgfchangelog.py
@@ -9,130 +9,135 @@
#
import os
-from ctypes import CDLL, RTLD_GLOBAL, create_string_buffer, \
- get_errno, byref, c_ulong
+from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong
+from ctypes.util import find_library
from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
+from py2py3 import (gr_cl_history_changelog, gr_cl_done,
+ gr_create_string_buffer, gr_cl_register,
+ gr_cl_history_done, bytearray_to_str)
-class Changes(object):
- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL,
- use_errno=True)
-
- @classmethod
- def geterrno(cls):
- return get_errno()
-
- @classmethod
- def raise_changelog_err(cls):
- errn = cls.geterrno()
- raise ChangelogException(errn, os.strerror(errn))
-
- @classmethod
- def _get_api(cls, call):
- return getattr(cls.libgfc, call)
-
- @classmethod
- def cl_init(cls):
- ret = cls._get_api('gf_changelog_init')(None)
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_register(cls, brick, path, log_file, log_level, retries=0):
- ret = cls._get_api('gf_changelog_register')(brick.encode(), path.encode(),
- log_file.encode(),
- log_level, retries)
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_scan(cls):
- ret = cls._get_api('gf_changelog_scan')()
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_startfresh(cls):
- ret = cls._get_api('gf_changelog_start_fresh')()
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_getchanges(cls):
- """ remove hardcoding for path name length """
- def clsort(f):
- return f.split('.')[-1]
- changes = []
- buf = create_string_buffer(b'\0' * 4096)
- call = cls._get_api('gf_changelog_next_change')
-
- while True:
- ret = call(buf, 4096)
- if ret in (0, -1):
- break
- changes.append(buf.raw[:ret - 1].decode())
- if ret == -1:
- cls.raise_changelog_err()
- # cleanup tracker
- cls.cl_startfresh()
- return sorted(changes, key=clsort)
-
- @classmethod
- def cl_done(cls, clfile):
- ret = cls._get_api('gf_changelog_done')(clfile.encode())
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_history_scan(cls):
- ret = cls._get_api('gf_history_changelog_scan')()
- if ret == -1:
- cls.raise_changelog_err()
-
- return ret
-
- @classmethod
- def cl_history_changelog(cls, changelog_path, start, end, num_parallel):
- actual_end = c_ulong()
- ret = cls._get_api('gf_history_changelog')(changelog_path.encode(), start, end,
- num_parallel,
- byref(actual_end))
- if ret == -1:
- cls.raise_changelog_err()
-
- if ret == -2:
- raise ChangelogHistoryNotAvailable()
-
- return (ret, actual_end.value)
-
- @classmethod
- def cl_history_startfresh(cls):
- ret = cls._get_api('gf_history_changelog_start_fresh')()
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_history_getchanges(cls):
- """ remove hardcoding for path name length """
- def clsort(f):
- return f.split('.')[-1]
-
- changes = []
- buf = create_string_buffer(b'\0' * 4096)
- call = cls._get_api('gf_history_changelog_next_change')
-
- while True:
- ret = call(buf, 4096)
- if ret in (0, -1):
- break
- changes.append(buf.raw[:ret - 1].decode())
- if ret == -1:
- cls.raise_changelog_err()
-
- return sorted(changes, key=clsort)
-
- @classmethod
- def cl_history_done(cls, clfile):
- ret = cls._get_api('gf_history_changelog_done')(clfile.encode())
- if ret == -1:
- cls.raise_changelog_err()
+libgfc = CDLL(
+ find_library("gfchangelog"),
+ mode=RTLD_GLOBAL,
+ use_errno=True
+)
+
+
+def _raise_changelog_err():
+ errn = get_errno()
+ raise ChangelogException(errn, os.strerror(errn))
+
+
+def _init():
+ if libgfc.gf_changelog_init(None) == -1:
+ _raise_changelog_err()
+
+
+def register(brick, path, log_file, log_level, retries=0):
+ _init()
+
+ ret = gr_cl_register(libgfc, brick, path, log_file, log_level, retries)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def scan():
+ ret = libgfc.gf_changelog_scan()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def startfresh():
+ ret = libgfc.gf_changelog_start_fresh()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def getchanges():
+ def clsort(cfile):
+ return cfile.split('.')[-1]
+
+ changes = []
+ buf = gr_create_string_buffer(4096)
+ call = libgfc.gf_changelog_next_change
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break
+
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+ # cleanup tracker
+ startfresh()
+
+ return sorted(changes, key=clsort)
+
+
+def done(clfile):
+ ret = gr_cl_done(libgfc, clfile)
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def history_scan():
+ ret = libgfc.gf_history_changelog_scan()
+ if ret == -1:
+ _raise_changelog_err()
+
+ return ret
+
+
+def history_changelog(changelog_path, start, end, num_parallel):
+ actual_end = c_ulong()
+ ret = gr_cl_history_changelog(libgfc, changelog_path, start, end,
+ num_parallel, byref(actual_end))
+ if ret == -1:
+ _raise_changelog_err()
+
+ if ret == -2:
+ raise ChangelogHistoryNotAvailable()
+
+ return (ret, actual_end.value)
+
+
+def history_startfresh():
+ ret = libgfc.gf_history_changelog_start_fresh()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def history_getchanges():
+ def clsort(cfile):
+ return cfile.split('.')[-1]
+
+ changes = []
+ buf = gr_create_string_buffer(4096)
+ call = libgfc.gf_history_changelog_next_change
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break
+
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+ return sorted(changes, key=clsort)
+
+
+def history_done(clfile):
+ ret = gr_cl_history_done(libgfc, clfile)
+ if ret == -1:
+ _raise_changelog_err()
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 3da7610eee0..9501aeae6b5 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -22,11 +22,13 @@ from threading import Condition, Lock
from datetime import datetime
import gsyncdconfig as gconf
+import libgfchangelog
from rconf import rconf
-from syncdutils import Thread, GsyncdError, escape_space_newline
-from syncdutils import unescape_space_newline, gauxpfx, escape
-from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid
-from syncdutils import NoStimeAvailable, PartialHistoryAvailable
+from syncdutils import (Thread, GsyncdError, escape_space_newline,
+ unescape_space_newline, gauxpfx, escape,
+ lstat, errno_wrap, FreeObject, lf, matching_disk_gfid,
+ NoStimeAvailable, PartialHistoryAvailable,
+ host_brick_split)
URXTIME = (-1, 0)
@@ -65,6 +67,9 @@ def _volinfo_hook_relax_foreign(self):
def edct(op, **ed):
dct = {}
dct['op'] = op
+ # This is used in automatic gfid conflict resolution.
+ # When marked True, it's skipped during re-processing.
+ dct['skip_entry'] = False
for k in ed:
if k == 'stat':
st = ed[k]
@@ -514,7 +519,7 @@ class GMasterCommon(object):
# If crawlwrap is called when partial history available,
# then it sets register_time which is the time when geo-rep
# worker registered to changelog consumption. Since nsec is
- # not considered in register time, their are chances of skipping
+ # not considered in register time, there are chances of skipping
# changes detection in xsync crawl. This limit will be reset when
# crawlwrap is called again.
self.live_changelog_start_time = None
@@ -792,6 +797,7 @@ class GMasterChangelogMixin(GMasterCommon):
pfx = gauxpfx()
fix_entry_ops = []
failures1 = []
+ remove_gfids = set()
for failure in failures:
if failure[2]['name_mismatch']:
pbname = failure[2]['slave_entry']
@@ -807,7 +813,7 @@ class GMasterChangelogMixin(GMasterCommon):
st = lstat(os.path.join(pfx, slave_gfid))
# Takes care of scenarios with no hardlinks
if isinstance(st, int) and st == ENOENT:
- logging.info(lf('Entry not present on master. Fixing gfid '
+ logging.debug(lf('Entry not present on master. Fixing gfid '
'mismatch in slave. Deleting the entry',
retry_count=retry_count,
entry=repr(failure)))
@@ -822,16 +828,33 @@ class GMasterChangelogMixin(GMasterCommon):
edct('UNLINK',
gfid=failure[2]['slave_gfid'],
entry=pbname))
+ remove_gfids.add(slave_gfid)
+ if op in ['RENAME']:
+ # If renamed gfid doesn't exists on master, remove
+ # rename entry and unlink src on slave
+ st = lstat(os.path.join(pfx, failure[0]['gfid']))
+ if isinstance(st, int) and st == ENOENT:
+ logging.debug("Unlink source %s" % repr(failure))
+ remove_gfids.add(failure[0]['gfid'])
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
# Takes care of scenarios of hardlinks/renames on master
elif not isinstance(st, int):
if matching_disk_gfid(slave_gfid, pbname):
# Safe to ignore the failure as master contains same
# file with same gfid. Remove entry from entries list
- logging.info(lf('Fixing gfid mismatch in slave. '
+ logging.debug(lf('Fixing gfid mismatch in slave. '
' Safe to ignore, take out entry',
retry_count=retry_count,
entry=repr(failure)))
- entries.remove(failure[0])
+ remove_gfids.add(failure[0]['gfid'])
+ if op == 'RENAME':
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
# The file exists on master but with different name.
# Probably renamed and got missed during xsync crawl.
elif failure[2]['slave_isdir']:
@@ -844,25 +867,28 @@ class GMasterChangelogMixin(GMasterCommon):
dst_entry = os.path.join(pfx, realpath.split('/')[-2],
realpath.split('/')[-1])
src_entry = pbname
- logging.info(lf('Fixing dir name/gfid mismatch in '
+ logging.debug(lf('Fixing dir name/gfid mismatch in '
'slave', retry_count=retry_count,
entry=repr(failure)))
if src_entry == dst_entry:
# Safe to ignore the failure as master contains
# same directory as in slave with same gfid.
# Remove the failure entry from entries list
- logging.info(lf('Fixing dir name/gfid mismatch'
+ logging.debug(lf('Fixing dir name/gfid mismatch'
' in slave. Safe to ignore, '
'take out entry',
retry_count=retry_count,
entry=repr(failure)))
- entries.remove(failure[0])
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
else:
rename_dict = edct('RENAME', gfid=slave_gfid,
entry=src_entry,
entry1=dst_entry, stat=st,
link=None)
- logging.info(lf('Fixing dir name/gfid mismatch'
+ logging.debug(lf('Fixing dir name/gfid mismatch'
' in slave. Renaming',
retry_count=retry_count,
entry=repr(rename_dict)))
@@ -872,7 +898,7 @@ class GMasterChangelogMixin(GMasterCommon):
# renamed file exists and we are sure from
# matching_disk_gfid check that the entry doesn't
# exist with same gfid so we can safely delete on slave
- logging.info(lf('Fixing file gfid mismatch in slave. '
+ logging.debug(lf('Fixing file gfid mismatch in slave. '
'Hardlink/Rename Case. Deleting entry',
retry_count=retry_count,
entry=repr(failure)))
@@ -891,14 +917,17 @@ class GMasterChangelogMixin(GMasterCommon):
# Safe to ignore the failure as master doesn't contain
# parent directory.
if isinstance(st, int):
- logging.info(lf('Fixing ENOENT error in slave. Parent '
+ logging.debug(lf('Fixing ENOENT error in slave. Parent '
'does not exist on master. Safe to '
'ignore, take out entry',
retry_count=retry_count,
entry=repr(failure)))
- entries.remove(failure[0])
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
else:
- logging.info(lf('Fixing ENOENT error in slave. Create '
+ logging.debug(lf('Fixing ENOENT error in slave. Create '
'parent directory on slave.',
retry_count=retry_count,
entry=repr(failure)))
@@ -913,6 +942,14 @@ class GMasterChangelogMixin(GMasterCommon):
edct('MKDIR', gfid=pargfid, entry=dir_entry,
mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
+ logging.debug("remove_gfids: %s" % repr(remove_gfids))
+ if remove_gfids:
+ for e in entries:
+ if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \
+ and e['gfid'] in remove_gfids:
+ logging.debug("Removed entry op from retrial list: entry: %s" % repr(e))
+ e['skip_entry'] = True
+
if fix_entry_ops:
# Process deletions of entries whose gfids are mismatched
failures1 = self.slave.server.entry_ops(fix_entry_ops)
@@ -1170,7 +1207,6 @@ class GMasterChangelogMixin(GMasterCommon):
logging.debug('entries: %s' % repr(entries))
# Increment counters for Status
- self.status.inc_value("entry", len(entries))
self.files_in_batch += len(datas)
self.status.inc_value("data", len(datas))
@@ -1188,10 +1224,13 @@ class GMasterChangelogMixin(GMasterCommon):
if gconf.get("gfid-conflict-resolution"):
count = 0
+ if failures:
+ logging.info(lf('Entry ops failed with gfid mismatch',
+ count=len(failures)))
while failures and count < self.MAX_OE_RETRIES:
count += 1
self.handle_entry_failures(failures, entries)
- logging.info("Retry original entries. count = %s" % count)
+ logging.info(lf('Retry original entries', count=count))
failures = self.slave.server.entry_ops(entries)
if not failures:
logging.info("Successfully fixed all entry ops with "
@@ -1233,10 +1272,10 @@ class GMasterChangelogMixin(GMasterCommon):
continue
meta_entries.append(edct('META', go=go[0], stat=st))
if meta_entries:
- self.status.inc_value("meta", len(entries))
+ self.status.inc_value("meta", len(meta_entries))
failures = self.slave.server.meta_ops(meta_entries)
self.log_failures(failures, 'go', '', 'META')
- self.status.dec_value("meta", len(entries))
+ self.status.dec_value("meta", len(meta_entries))
self.batch_stats["META_SYNC_TIME"] += time.time() - meta_start_time
@@ -1428,7 +1467,7 @@ class GMasterChangelogMixin(GMasterCommon):
node = rconf.args.resource_remote
node_data = node.split("@")
node = node_data[-1]
- remote_node_ip = node.split(":")[0]
+ remote_node_ip, _ = host_brick_split(node)
self.status.set_slave_node(remote_node_ip)
def changelogs_batch_process(self, changes):
@@ -1461,9 +1500,9 @@ class GMasterChangelogMixin(GMasterCommon):
# that are _historical_ to that time.
data_stime = self.get_data_stime()
- self.changelog_agent.scan()
+ libgfchangelog.scan()
self.crawls += 1
- changes = self.changelog_agent.getchanges()
+ changes = libgfchangelog.getchanges()
if changes:
if data_stime:
logging.info(lf("slave's time",
@@ -1480,10 +1519,9 @@ class GMasterChangelogMixin(GMasterCommon):
self.changelogs_batch_process(changes)
- def register(self, register_time, changelog_agent, status):
- self.changelog_agent = changelog_agent
+ def register(self, register_time, status):
self.sleep_interval = gconf.get("change-interval")
- self.changelog_done_func = self.changelog_agent.done
+ self.changelog_done_func = libgfchangelog.done
self.tempdir = self.setup_working_dir()
self.processed_changelogs_dir = os.path.join(self.tempdir,
".processed")
@@ -1492,11 +1530,10 @@ class GMasterChangelogMixin(GMasterCommon):
class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
- def register(self, register_time, changelog_agent, status):
- self.changelog_agent = changelog_agent
+ def register(self, register_time, status):
self.changelog_register_time = register_time
self.history_crawl_start_time = register_time
- self.changelog_done_func = self.changelog_agent.history_done
+ self.changelog_done_func = libgfchangelog.history_done
self.history_turns = 0
self.tempdir = self.setup_working_dir()
self.processed_changelogs_dir = os.path.join(self.tempdir,
@@ -1510,6 +1547,12 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
data_stime = self.get_data_stime()
end_time = int(time.time())
+
+ #as start of historical crawl marks Geo-rep worker restart
+ if gconf.get("ignore-deletes"):
+ logging.info(lf('ignore-deletes config option is set',
+ stime=data_stime))
+
logging.info(lf('starting history crawl',
turns=self.history_turns,
stime=data_stime,
@@ -1524,7 +1567,7 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
# location then consuming history will not work(Known issue as of now)
changelog_path = os.path.join(rconf.args.local_path,
".glusterfs/changelogs")
- ret, actual_end = self.changelog_agent.history(
+ ret, actual_end = libgfchangelog.history_changelog(
changelog_path,
data_stime[0],
end_time,
@@ -1536,10 +1579,10 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
# to be processed. returns positive value as number of changelogs
# to be processed, which will be fetched using
# history_getchanges()
- while self.changelog_agent.history_scan() > 0:
+ while libgfchangelog.history_scan() > 0:
self.crawls += 1
- changes = self.changelog_agent.history_getchanges()
+ changes = libgfchangelog.history_getchanges()
if changes:
if data_stime:
logging.info(lf("slave's time",
@@ -1592,7 +1635,7 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
XSYNC_MAX_ENTRIES = 1 << 13
- def register(self, register_time=None, changelog_agent=None, status=None):
+ def register(self, register_time=None, status=None):
self.status = status
self.counter = 0
self.comlist = []
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index c45ef24e59f..6aa7b9dfc99 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -20,13 +20,14 @@ import random
from resource import SSH
import gsyncdconfig as gconf
+import libgfchangelog
from rconf import rconf
-from syncdutils import select, waitpid, errno_wrap, lf, grabpidfile
-from syncdutils import set_term_handler, GsyncdError
-from syncdutils import Thread, finalize, Volinfo, VolinfoFromGconf
-from syncdutils import gf_event, EVENT_GEOREP_FAULTY, get_up_nodes
+from syncdutils import (select, waitpid, errno_wrap, lf, grabpidfile,
+ set_term_handler, GsyncdError,
+ Thread, finalize, Volinfo, VolinfoFromGconf,
+ gf_event, EVENT_GEOREP_FAULTY, get_up_nodes,
+ unshare_propagation_supported)
from gsyncdstatus import GeorepStatus, set_monitor_status
-from syncdutils import unshare_propagation_supported
import py2py3
from py2py3 import pipe
@@ -37,6 +38,8 @@ def get_subvol_num(brick_idx, vol, hot):
tier = vol.is_tier()
disperse_count = vol.disperse_count(tier, hot)
replica_count = vol.replica_count(tier, hot)
+ distribute_count = vol.distribution_count(tier, hot)
+ gconf.setconfig("master-distribution-count", distribute_count)
if (tier and not hot):
brick_idx = brick_idx - vol.get_hot_bricks_count(tier)
@@ -79,7 +82,7 @@ class Monitor(object):
# give a chance to graceful exit
errno_wrap(os.kill, [-os.getpid(), signal.SIGTERM], [ESRCH])
- def monitor(self, w, argv, cpids, agents, slave_vol, slave_host, master,
+ def monitor(self, w, argv, cpids, slave_vol, slave_host, master,
suuid, slavenodes):
"""the monitor loop
@@ -148,7 +151,7 @@ class Monitor(object):
remote_host = "%s@%s" % (remote_user, remote_new[0])
remote_id = remote_new[1]
- # Spawn the worker and agent in lock to avoid fd leak
+ # Spawn the worker in lock to avoid fd leak
self.lock.acquire()
self.status[w[0]['dir']].set_worker_status(self.ST_INIT)
@@ -156,44 +159,10 @@ class Monitor(object):
brick=w[0]['dir'],
slave_node=remote_host))
- # Couple of pipe pairs for RPC communication b/w
- # worker and changelog agent.
-
- # read/write end for agent
- (ra, ww) = pipe()
- # read/write end for worker
- (rw, wa) = pipe()
-
- # spawn the agent process
- apid = os.fork()
- if apid == 0:
- os.close(rw)
- os.close(ww)
- args_to_agent = argv + [
- 'agent',
- rconf.args.master,
- rconf.args.slave,
- '--local-path', w[0]['dir'],
- '--local-node', w[0]['host'],
- '--local-node-id', w[0]['uuid'],
- '--slave-id', suuid,
- '--rpc-fd', ','.join([str(ra), str(wa), str(rw), str(ww)])
- ]
-
- if rconf.args.config_file is not None:
- args_to_agent += ['-c', rconf.args.config_file]
-
- if rconf.args.debug:
- args_to_agent.append("--debug")
-
- os.execv(sys.executable, args_to_agent)
-
pr, pw = pipe()
cpid = os.fork()
if cpid == 0:
os.close(pr)
- os.close(ra)
- os.close(wa)
args_to_worker = argv + [
'worker',
@@ -204,8 +173,6 @@ class Monitor(object):
'--local-node', w[0]['host'],
'--local-node-id', w[0]['uuid'],
'--slave-id', suuid,
- '--rpc-fd',
- ','.join([str(rw), str(ww), str(ra), str(wa)]),
'--subvol-num', str(w[2]),
'--resource-remote', remote_host,
'--resource-remote-id', remote_id
@@ -236,14 +203,8 @@ class Monitor(object):
os.execv(sys.executable, args_to_worker)
cpids.add(cpid)
- agents.add(apid)
os.close(pw)
- # close all RPC pipes in monitor
- os.close(ra)
- os.close(wa)
- os.close(rw)
- os.close(ww)
self.lock.release()
t0 = time.time()
@@ -252,42 +213,19 @@ class Monitor(object):
if so:
ret = nwait(cpid, os.WNOHANG)
- ret_agent = nwait(apid, os.WNOHANG)
-
- if ret_agent is not None:
- # Agent is died Kill Worker
- logging.info(lf("Changelog Agent died, Aborting Worker",
- brick=w[0]['dir']))
- errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
- nwait(cpid)
- nwait(apid)
if ret is not None:
logging.info(lf("worker died before establishing "
"connection",
brick=w[0]['dir']))
- nwait(apid) # wait for agent
else:
logging.debug("worker(%s) connected" % w[0]['dir'])
while time.time() < t0 + conn_timeout:
ret = nwait(cpid, os.WNOHANG)
- ret_agent = nwait(apid, os.WNOHANG)
if ret is not None:
logging.info(lf("worker died in startup phase",
brick=w[0]['dir']))
- nwait(apid) # wait for agent
- break
-
- if ret_agent is not None:
- # Agent is died Kill Worker
- logging.info(lf("Changelog Agent died, Aborting "
- "Worker",
- brick=w[0]['dir']))
- errno_wrap(os.kill, [cpid, signal.SIGKILL],
- [ESRCH])
- nwait(cpid)
- nwait(apid)
break
time.sleep(1)
@@ -302,12 +240,8 @@ class Monitor(object):
brick=w[0]['dir'],
timeout=conn_timeout))
errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
- nwait(apid) # wait for agent
ret = nwait(cpid)
if ret is None:
- # If worker dies, agent terminates on EOF.
- # So lets wait for agent first.
- nwait(apid)
ret = nwait(cpid)
if exit_signalled(ret):
ret = 0
@@ -331,18 +265,15 @@ class Monitor(object):
argv = [os.path.basename(sys.executable), sys.argv[0]]
cpids = set()
- agents = set()
ta = []
for wx in wspx:
def wmon(w):
- cpid, _ = self.monitor(w, argv, cpids, agents, slave_vol,
+ cpid, _ = self.monitor(w, argv, cpids, slave_vol,
slave_host, master, suuid, slavenodes)
time.sleep(1)
self.lock.acquire()
for cpid in cpids:
errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
- for apid in agents:
- errno_wrap(os.kill, [apid, signal.SIGKILL], [ESRCH])
self.lock.release()
finalize(exval=1)
t = Thread(target=wmon, args=[wx])
@@ -352,8 +283,8 @@ class Monitor(object):
# monitor status was being updated in each monitor thread. It
# should not be done as it can cause deadlock for a worker start.
# set_monitor_status uses flock to synchronize multple instances
- # updating the file. Since each monitor thread forks worker and
- # agent, these processes can hold the reference to fd of status
+ # updating the file. Since each monitor thread forks worker,
+ # these processes can hold the reference to fd of status
# file causing deadlock to workers which starts later as flock
# will not be release until all references to same fd is closed.
# It will also cause fd leaks.
@@ -369,7 +300,7 @@ def distribute(master, slave):
if rconf.args.use_gconf_volinfo:
mvol = VolinfoFromGconf(master.volume, master=True)
else:
- mvol = Volinfo(master.volume, master.host)
+ mvol = Volinfo(master.volume, master.host, master=True)
logging.debug('master bricks: ' + repr(mvol.bricks))
prelude = []
slave_host = None
@@ -385,7 +316,7 @@ def distribute(master, slave):
if rconf.args.use_gconf_volinfo:
svol = VolinfoFromGconf(slave.volume, master=False)
else:
- svol = Volinfo(slave.volume, "localhost", prelude)
+ svol = Volinfo(slave.volume, "localhost", prelude, master=False)
sbricks = svol.bricks
suuid = svol.uuid
diff --git a/geo-replication/syncdaemon/py2py3.py b/geo-replication/syncdaemon/py2py3.py
index 4c0e1152aa8..f9c76e1b50a 100644
--- a/geo-replication/syncdaemon/py2py3.py
+++ b/geo-replication/syncdaemon/py2py3.py
@@ -12,7 +12,10 @@
import sys
import os
-
+import stat
+import struct
+from syncdutils import umask
+from ctypes import create_string_buffer
if sys.version_info >= (3,):
def pipe():
@@ -35,6 +38,77 @@ if sys.version_info >= (3,):
def str_to_bytearray(string):
return bytes([ord(c) for c in string])
+ def gr_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path.encode(), size, syscall,
+ attr.encode())
+ else:
+ return cls._query_xattr(path.encode(), size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path.encode(), attr.encode(), val,
+ len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path.encode(), attr.encode())
+
+ def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries):
+ return libgfapi.gf_changelog_register(brick.encode(),
+ path.encode(),
+ log_file.encode(),
+ log_level, retries)
+
+ def gr_cl_done(libgfapi, clfile):
+ return libgfapi.gf_changelog_done(clfile.encode())
+
+ def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfapi.gf_history_changelog(changelog_path.encode(),
+ start, end, num_parallel,
+ actual_end)
+
+ def gr_cl_history_done(libgfapi, clfile):
+ return libgfapi.gf_history_changelog_done(clfile.encode())
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
else:
def pipe():
(r, w) = os.pipe()
@@ -42,8 +116,69 @@ else:
# Raw conversion of bytearray to string
def bytearray_to_str(byte_arr):
- return ''.join([b for b in byte_arr])
+ return byte_arr
# Raw conversion of string to bytearray
def str_to_bytearray(string):
- return b"".join([c for c in string])
+ return string
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path, size, syscall, attr)
+ else:
+ return cls._query_xattr(path, size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path, attr)
+
+ def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries):
+ return libgfapi.gf_changelog_register(brick, path, log_file,
+ log_level, retries)
+
+ def gr_cl_done(libgfapi, clfile):
+ return libgfapi.gf_changelog_done(clfile)
+
+ def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfapi.gf_history_changelog(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gr_cl_history_done(libgfapi, clfile):
+ return libgfapi.gf_history_changelog_done(clfile)
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py
index 6065b828c99..c622afa6373 100644
--- a/geo-replication/syncdaemon/repce.py
+++ b/geo-replication/syncdaemon/repce.py
@@ -8,7 +8,6 @@
# cases as published by the Free Software Foundation.
#
-import _io
import os
import sys
import time
@@ -58,9 +57,9 @@ def recv(inf):
"""load an object from input stream
python2 and python3 compatibility, inf is sys.stdin
and is opened as text stream by default. Hence using the
- buffer attribute
+ buffer attribute in python3
"""
- if isinstance(inf, _io.TextIOWrapper):
+ if hasattr(inf, "buffer"):
return pickle.load(inf.buffer)
else:
return pickle.load(inf)
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index c8575f00e70..f12c7ceaa36 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -19,30 +19,31 @@ import struct
import logging
import tempfile
import subprocess
-from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EACCES
-from errno import EISDIR, ENOTEMPTY, ESTALE, EINVAL, EBUSY, EPERM
+from errno import (EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EACCES,
+ EISDIR, ENOTEMPTY, ESTALE, EINVAL, EBUSY, EPERM)
import errno
from rconf import rconf
import gsyncdconfig as gconf
+import libgfchangelog
import repce
from repce import RepceServer, RepceClient
from master import gmaster_builder
import syncdutils
-from syncdutils import GsyncdError, select, privileged, funcode
-from syncdutils import umask, entry2pb, gauxpfx, errno_wrap, lstat
-from syncdutils import NoStimeAvailable, PartialHistoryAvailable
-from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
-from syncdutils import get_changelog_log_level, get_rsync_version
-from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
-from syncdutils import GX_GFID_CANONICAL_LEN
+from syncdutils import (GsyncdError, select, privileged, funcode,
+ entry2pb, gauxpfx, errno_wrap, lstat,
+ NoStimeAvailable, PartialHistoryAvailable,
+ ChangelogException, ChangelogHistoryNotAvailable,
+ get_changelog_log_level, get_rsync_version,
+ GX_GFID_CANONICAL_LEN,
+ gf_mount_ready, lf, Popen, sup,
+ Xattr, matching_disk_gfid, get_gfid_from_mnt,
+ unshare_propagation_supported, get_slv_dir_path)
from gsyncdstatus import GeorepStatus
-from syncdutils import lf, Popen, sup
-from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
-from syncdutils import unshare_propagation_supported, get_slv_dir_path
-import py2py3
-from py2py3 import pipe, str_to_bytearray
+from py2py3 import (pipe, str_to_bytearray, entry_pack_reg,
+ entry_pack_reg_stat, entry_pack_mkdir,
+ entry_pack_symlink)
ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
@@ -377,37 +378,7 @@ class Server(object):
def entry_ops(cls, entries):
pfx = gauxpfx()
logging.debug('entries: %s' % repr(entries))
- # regular file
-
- def entry_pack_reg(gf, bn, mo, uid, gid):
- blen = len(bn)
- return struct.pack(cls._fmt_mknod(blen),
- uid, gid, gf.encode(), mo, bn.encode(),
- stat.S_IMODE(mo), 0, umask())
-
- def entry_pack_reg_stat(gf, bn, st):
- blen = len(bn)
- mo = st['mode']
- return struct.pack(cls._fmt_mknod(blen),
- st['uid'], st['gid'],
- gf.encode(), mo, bn.encode(),
- stat.S_IMODE(mo), 0, umask())
- # mkdir
-
- def entry_pack_mkdir(gf, bn, mo, uid, gid):
- blen = len(bn)
- return struct.pack(cls._fmt_mkdir(blen),
- uid, gid, gf.encode(), mo, bn.encode(),
- stat.S_IMODE(mo), umask())
- # symlink
-
- def entry_pack_symlink(gf, bn, lnk, st):
- blen = len(bn)
- llen = len(lnk)
- return struct.pack(cls._fmt_symlink(blen, llen),
- st['uid'], st['gid'],
- gf.encode(), st['mode'], bn.encode(),
- lnk.encode())
+ dist_count = rconf.args.master_dist_count
def entry_purge(op, entry, gfid, e, uid, gid):
# This is an extremely racy code and needs to be fixed ASAP.
@@ -457,7 +428,7 @@ class Server(object):
e['stat']['uid'] = uid
e['stat']['gid'] = gid
- if cmd_ret == EEXIST:
+ if cmd_ret in [EEXIST, ESTALE]:
if dst:
en = e['entry1']
else:
@@ -541,6 +512,12 @@ class Server(object):
entry = e['entry']
uid = 0
gid = 0
+
+ # Skip entry processing if it's marked true during gfid
+ # conflict resolution
+ if e['skip_entry']:
+ continue
+
if e.get("stat", {}):
# Copy UID/GID value and then reset to zero. Copied UID/GID
# will be used to run chown once entry is created.
@@ -581,8 +558,8 @@ class Server(object):
st = lstat(slink)
# don't create multiple entries with same gfid
if isinstance(st, int):
- blob = entry_pack_reg(
- gfid, bname, e['mode'], e['uid'], e['gid'])
+ blob = entry_pack_reg(cls, gfid, bname,
+ e['mode'], e['uid'], e['gid'])
# Self healed hardlinks are recorded as MKNOD.
# So if the gfid already exists, it should be
# processed as hard link not mknod.
@@ -597,8 +574,8 @@ class Server(object):
st = lstat(slink)
# don't create multiple entries with same gfid
if isinstance(st, int):
- blob = entry_pack_mkdir(
- gfid, bname, e['mode'], e['uid'], e['gid'])
+ blob = entry_pack_mkdir(cls, gfid, bname,
+ e['mode'], e['uid'], e['gid'])
elif (isinstance(lstat(en), int) or
not matching_disk_gfid(gfid, en)):
# If gfid of a directory exists on slave but path based
@@ -610,6 +587,8 @@ class Server(object):
logging.info(lf("Special case: rename on mkdir",
gfid=gfid, entry=repr(entry)))
src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
+ if src_entry is None:
+ collect_failure(e, ENOENT, uid, gid)
if src_entry is not None and src_entry != entry:
slv_entry_info = {}
slv_entry_info['gfid_mismatch'] = False
@@ -626,9 +605,9 @@ class Server(object):
if isinstance(st, int):
(pg, bname) = entry2pb(entry)
if stat.S_ISREG(e['stat']['mode']):
- blob = entry_pack_reg_stat(gfid, bname, e['stat'])
+ blob = entry_pack_reg_stat(cls, gfid, bname, e['stat'])
elif stat.S_ISLNK(e['stat']['mode']):
- blob = entry_pack_symlink(gfid, bname, e['link'],
+ blob = entry_pack_symlink(cls, gfid, bname, e['link'],
e['stat'])
else:
cmd_ret = errno_wrap(os.link,
@@ -639,7 +618,7 @@ class Server(object):
en = e['entry']
st = lstat(entry)
if isinstance(st, int):
- blob = entry_pack_symlink(gfid, bname, e['link'],
+ blob = entry_pack_symlink(cls, gfid, bname, e['link'],
e['stat'])
elif not matching_disk_gfid(gfid, en):
collect_failure(e, EEXIST, uid, gid)
@@ -654,22 +633,27 @@ class Server(object):
# exist with different gfid.
if not matching_disk_gfid(gfid, entry):
if e['stat'] and not stat.S_ISDIR(e['stat']['mode']):
- if stat.S_ISLNK(e['stat']['mode']) and \
- e['link'] is not None:
- st1 = lstat(en)
- if isinstance(st1, int):
- (pg, bname) = entry2pb(en)
- blob = entry_pack_symlink(gfid, bname,
- e['link'], e['stat'])
- elif not matching_disk_gfid(gfid, en):
- collect_failure(e, EEXIST, uid, gid, True)
+ if stat.S_ISLNK(e['stat']['mode']):
+ # src is not present, so don't sync symlink as
+ # we don't know target. It's ok to ignore. If
+ # it's unliked, it's fine. If it's renamed to
+ # something else, it will be synced then.
+ if e['link'] is not None:
+ st1 = lstat(en)
+ if isinstance(st1, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_symlink(cls, gfid, bname,
+ e['link'],
+ e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, uid, gid, True)
else:
slink = os.path.join(pfx, gfid)
st = lstat(slink)
# don't create multiple entries with same gfid
if isinstance(st, int):
(pg, bname) = entry2pb(en)
- blob = entry_pack_reg_stat(gfid, bname,
+ blob = entry_pack_reg_stat(cls, gfid, bname,
e['stat'])
else:
cmd_ret = errno_wrap(os.link, [slink, en],
@@ -704,15 +688,21 @@ class Server(object):
raise
else:
raise
- elif not matching_disk_gfid(gfid, en):
+ elif not matching_disk_gfid(gfid, en) and dist_count > 1:
collect_failure(e, EEXIST, uid, gid, True)
else:
+ # We are here which means matching_disk_gfid for
+ # both source and destination has returned false
+ # and distribution count for master vol is greater
+ # then one. Which basically says both the source and
+ # destination exist and not hardlinks.
+ # So we are safe to go ahead with rename here.
rename_with_disk_gfid_confirmation(gfid, entry, en,
uid, gid)
if blob:
cmd_ret = errno_wrap(Xattr.lsetxattr,
[pg, 'glusterfs.gfid.newfile', blob],
- [EEXIST, ENOENT],
+ [EEXIST, ENOENT, ESTALE],
[ESTALE, EINVAL, EBUSY])
collect_failure(e, cmd_ret, uid, gid)
@@ -961,6 +951,16 @@ class Mounter(object):
logging.exception('mount cleanup failure:')
rv = 200
os._exit(rv)
+
+ #Polling the dht.subvol.status value.
+ RETRIES = 10
+ while not gf_mount_ready():
+ if RETRIES < 0:
+ logging.error('Subvols are not up')
+ break
+ RETRIES -= 1
+ time.sleep(0.2)
+
logging.debug('auxiliary glusterfs mount prepared')
@@ -1256,9 +1256,6 @@ class GLUSTER(object):
# register the crawlers and start crawling
# g1 ==> Xsync, g2 ==> config.change_detector(changelog by default)
# g3 ==> changelog History
- (inf, ouf, ra, wa) = rconf.args.rpc_fd.split(',')
- changelog_agent = RepceClient(int(inf), int(ouf))
-
status = GeorepStatus(gconf.get("state-file"),
rconf.args.local_node,
rconf.args.local_path,
@@ -1266,12 +1263,6 @@ class GLUSTER(object):
rconf.args.master,
rconf.args.slave)
status.reset_on_worker_start()
- rv = changelog_agent.version()
- if int(rv) != CHANGELOG_AGENT_CLIENT_VERSION:
- raise GsyncdError(
- "RePCe major version mismatch(changelog agent): "
- "local %s, remote %s" %
- (CHANGELOG_AGENT_CLIENT_VERSION, rv))
try:
workdir = g2.setup_working_dir()
@@ -1282,17 +1273,16 @@ class GLUSTER(object):
# register with the changelog library
# 9 == log level (DEBUG)
# 5 == connection retries
- changelog_agent.init()
- changelog_agent.register(rconf.args.local_path,
- workdir,
- gconf.get("changelog-log-file"),
- get_changelog_log_level(
- gconf.get("changelog-log-level")),
- g2.CHANGELOG_CONN_RETRIES)
+ libgfchangelog.register(rconf.args.local_path,
+ workdir,
+ gconf.get("changelog-log-file"),
+ get_changelog_log_level(
+ gconf.get("changelog-log-level")),
+ g2.CHANGELOG_CONN_RETRIES)
register_time = int(time.time())
- g2.register(register_time, changelog_agent, status)
- g3.register(register_time, changelog_agent, status)
+ g2.register(register_time, status)
+ g3.register(register_time, status)
except ChangelogException as e:
logging.error(lf("Changelog register failed", error=e))
sys.exit(1)
@@ -1427,7 +1417,9 @@ class SSH(object):
'--slave-gluster-log-level',
gconf.get("slave-gluster-log-level"),
'--slave-gluster-command-dir',
- gconf.get("slave-gluster-command-dir")]
+ gconf.get("slave-gluster-command-dir"),
+ '--master-dist-count',
+ str(gconf.get("master-distribution-count"))]
if gconf.get("slave-access-mount"):
args_to_slave.append('--slave-access-mount')
@@ -1492,7 +1484,7 @@ class SSH(object):
if log_rsync_performance:
# use stdout=PIPE only when log_rsync_performance enabled
- # Else rsync will write to stdout and nobody is their
+ # Else rsync will write to stdout and nobody is there
# to consume. If PIPE is full rsync hangs.
po = Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, universal_newlines=True)
@@ -1530,7 +1522,7 @@ class SSH(object):
return po
- def tarssh(self, files, slaveurl, log_err=False):
+ def tarssh(self, files, log_err=False):
"""invoke tar+ssh
-z (compress) can be use if needed, but omitting it now
as it results in weird error (tar+ssh errors out (errcode: 2)
@@ -1538,10 +1530,11 @@ class SSH(object):
if not files:
raise GsyncdError("no files to sync")
logging.debug("files: " + ", ".join(files))
- (host, rdir) = slaveurl.split(':')
+ (host, rdir) = self.slaveurl.split(':')
+
tar_cmd = ["tar"] + \
["--sparse", "-cf", "-", "--files-from", "-"]
- ssh_cmd = gconf.get("ssh-command-tar").split() + \
+ ssh_cmd = gconf.get("ssh-command").split() + \
gconf.get("ssh-options-tar").split() + \
["-p", str(gconf.get("ssh-port"))] + \
[host, "tar"] + \
@@ -1557,15 +1550,29 @@ class SSH(object):
p0.stdin.close()
p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits.
- # wait for tar to terminate, collecting any errors, further
- # waiting for transfer to complete
- _, stderr1 = p1.communicate()
# stdin and stdout of p0 is already closed, Reset to None and
# wait for child process to complete
p0.stdin = None
p0.stdout = None
- p0.communicate()
+
+ def wait_for_tar(p0):
+ _, stderr = p0.communicate()
+ if log_err:
+ for errline in stderr.strip().split("\n")[:-1]:
+ if "No such file or directory" not in errline:
+ logging.error(lf("SYNC Error",
+ sync_engine="Tarssh",
+ error=errline))
+
+ t = syncdutils.Thread(target=wait_for_tar, args=(p0, ))
+ # wait for tar to terminate, collecting any errors, further
+ # waiting for transfer to complete
+ t.start()
+
+ # wait for ssh process
+ _, stderr1 = p1.communicate()
+ t.join()
if log_err:
for errline in stderr1.strip().split("\n")[:-1]:
diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py
index 30050ec4743..b8508532e30 100644
--- a/geo-replication/syncdaemon/subcmds.py
+++ b/geo-replication/syncdaemon/subcmds.py
@@ -73,7 +73,11 @@ def subcmd_worker(args):
Popen.init_errhandler()
fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
local = GLUSTER("localhost", args.master)
- slavehost, slavevol = args.slave.split("::")
+ slave_url, slavevol = args.slave.split("::")
+ if "@" not in slave_url:
+ slavehost = args.resource_remote
+ else:
+ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote)
remote = SSH(slavehost, slavevol)
remote.connect_remote()
local.connect()
@@ -93,25 +97,19 @@ def subcmd_slave(args):
local.service_loop()
-def subcmd_agent(args):
- import os
- from changelogagent import agent, Changelog
- from syncdutils import lf
-
- os.setsid()
- logging.debug(lf("RPC FD",
- rpc_fd=repr(args.rpc_fd)))
- return agent(Changelog(), args.rpc_fd)
-
-
def subcmd_voluuidget(args):
from subprocess import Popen, PIPE
import xml.etree.ElementTree as XET
ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError
- po = Popen(['gluster', '--xml', '--remote-host=' + args.host,
- 'volume', 'info', args.volname], bufsize=0,
+ cmd = ['gluster', '--xml', '--remote-host=' + args.host,
+ 'volume', 'info', args.volname]
+
+ if args.inet6:
+ cmd.append("--inet6")
+
+ po = Popen(cmd, bufsize=0,
stdin=None, stdout=PIPE, stderr=PIPE,
universal_newlines=True)
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index fd96ca70b2f..a3df103e76c 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -21,8 +21,8 @@ import subprocess
import socket
from subprocess import PIPE
from threading import Lock, Thread as baseThread
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED
-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode
+from errno import (EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED,
+ EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO)
from signal import signal, SIGTERM
import select as oselect
from os import waitpid as owaitpid
@@ -37,10 +37,10 @@ from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE
sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
EVENTS_ENABLED = True
try:
- from events.eventtypes import GEOREP_FAULTY as EVENT_GEOREP_FAULTY
- from events.eventtypes import GEOREP_ACTIVE as EVENT_GEOREP_ACTIVE
- from events.eventtypes import GEOREP_PASSIVE as EVENT_GEOREP_PASSIVE
- from events.eventtypes import GEOREP_CHECKPOINT_COMPLETED \
+ from gfevents.eventtypes import GEOREP_FAULTY as EVENT_GEOREP_FAULTY
+ from gfevents.eventtypes import GEOREP_ACTIVE as EVENT_GEOREP_ACTIVE
+ from gfevents.eventtypes import GEOREP_PASSIVE as EVENT_GEOREP_PASSIVE
+ from gfevents.eventtypes import GEOREP_CHECKPOINT_COMPLETED \
as EVENT_GEOREP_CHECKPOINT_COMPLETED
except ImportError:
# Events APIs not installed, dummy eventtypes with None
@@ -55,14 +55,15 @@ from rconf import rconf
from hashlib import sha256 as sha256
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
+
# auxiliary gfid based access prefix
_CL_AUX_GFID_PFX = ".gfid/"
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
GF_OP_RETRIES = 10
GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
-CHANGELOG_AGENT_SERVER_VERSION = 1.0
-CHANGELOG_AGENT_CLIENT_VERSION = 1.0
NodeID = None
rsync_version = None
unshare_mnt_propagation = None
@@ -99,6 +100,19 @@ def unescape_space_newline(s):
.replace(NEWLINE_ESCAPE_CHAR, "\n")\
.replace(PERCENTAGE_ESCAPE_CHAR, "%")
+# gf_mount_ready() returns 1 if all subvols are up, else 0
+def gf_mount_ready():
+ ret = errno_wrap(Xattr.lgetxattr,
+ ['.', 'dht.subvol.status', 16],
+ [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
+
+ if isinstance(ret, int):
+ logging.error("failed to get the xattr value")
+ return 1
+ ret = ret.rstrip('\x00')
+ if ret == "1":
+ return 1
+ return 0
def norm(s):
if s:
@@ -330,13 +344,24 @@ def log_raise_exception(excont):
ECONNABORTED):
logging.error(lf('Gluster Mount process exited',
error=errorcode[exc.errno]))
+ elif isinstance(exc, OSError) and exc.errno == EIO:
+ logging.error("Getting \"Input/Output error\" "
+ "is most likely due to "
+ "a. Brick is down or "
+ "b. Split brain issue.")
+ logging.error("This is expected as per design to "
+ "keep the consistency of the file system. "
+ "Once the above issue is resolved "
+ "geo-replication would automatically "
+ "proceed further.")
+ logtag = "FAIL"
else:
logtag = "FAIL"
if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
logtag = "FULL EXCEPTION TRACE"
if logtag:
logging.exception(logtag + ": ")
- sys.stderr.write("failed with %s.\n" % type(exc).__name__)
+ sys.stderr.write("failed with %s: %s.\n" % (type(exc).__name__, exc))
excont.exval = 1
sys.exit(excont.exval)
@@ -563,7 +588,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
def lstat(e):
return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
-
def get_gfid_from_mnt(gfidpath):
return errno_wrap(Xattr.lgetxattr,
[gfidpath, 'glusterfs.gfid.string',
@@ -599,7 +623,7 @@ class ChangelogException(OSError):
def gf_event(event_type, **kwargs):
if EVENTS_ENABLED:
- from events.gf_event import gf_event as gfevent
+ from gfevents.gf_event import gf_event as gfevent
gfevent(event_type, **kwargs)
@@ -670,9 +694,10 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
global slv_bricks
dir_path = ENOENT
+ pfx = gauxpfx()
if not slv_bricks:
- slv_info = Volinfo(slv_volume, slv_host)
+ slv_info = Volinfo(slv_volume, slv_host, master=False)
slv_bricks = slv_info.bricks
# Result of readlink would be of format as below.
# readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename"
@@ -683,19 +708,32 @@ def get_slv_dir_path(slv_host, slv_volume, gfid):
gfid[2:4],
gfid], [ENOENT], [ESTALE])
if dir_path != ENOENT:
- break
-
- if not isinstance(dir_path, int):
- realpath = errno_wrap(os.readlink, [dir_path],
- [ENOENT], [ESTALE])
-
- if not isinstance(realpath, int):
- realpath_parts = realpath.split('/')
- pargfid = realpath_parts[-2]
- basename = realpath_parts[-1]
- pfx = gauxpfx()
- dir_entry = os.path.join(pfx, pargfid, basename)
- return dir_entry
+ try:
+ realpath = errno_wrap(os.readlink, [dir_path],
+ [ENOENT], [ESTALE])
+ if not isinstance(realpath, int):
+ realpath_parts = realpath.split('/')
+ pargfid = realpath_parts[-2]
+ basename = realpath_parts[-1]
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
+ except OSError:
+ # .gfid/GFID
+ gfidpath = unescape_space_newline(os.path.join(pfx, gfid))
+ realpath = errno_wrap(Xattr.lgetxattr_buf,
+ [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE])
+ if not isinstance(realpath, int):
+ basename = os.path.basename(realpath).rstrip('\x00')
+ dirpath = os.path.dirname(realpath)
+ if dirpath == "/":
+ pargfid = ROOT_GFID
+ else:
+ dirpath = dirpath.strip("/")
+ pargfid = get_gfid_from_mnt(dirpath)
+ if isinstance(pargfid, int):
+ return None
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
return None
@@ -705,12 +743,12 @@ def lf(event, **kwargs):
Log Format helper function, log messages can be
easily modified to structured log format.
lf("Config Change", sync_jobs=4, brick=/bricks/b1) will be
- converted as "Config Change<TAB>brick=/bricks/b1<TAB>sync_jobs=4"
+ converted as "Config Change [{brick=/bricks/b1}, {sync_jobs=4}]"
"""
- msg = event
+ msgparts = []
for k, v in kwargs.items():
- msg += "\t{0}={1}".format(k, v)
- return msg
+ msgparts.append("{%s=%s}" % (k, v))
+ return "%s [%s]" % (event, ", ".join(msgparts))
class Popen(subprocess.Popen):
@@ -856,10 +894,29 @@ class Popen(subprocess.Popen):
self.errfail()
+def host_brick_split(value):
+ """
+ IPv6 compatible way to split and get the host
+ and brick information. Example inputs:
+ node1.example.com:/exports/bricks/brick1/brick
+ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick
+ """
+ parts = value.split(":")
+ brick = parts[-1]
+ hostparts = parts[0:-1]
+ return (":".join(hostparts), brick)
+
+
class Volinfo(object):
- def __init__(self, vol, host='localhost', prelude=[]):
- po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host,
+ def __init__(self, vol, host='localhost', prelude=[], master=True):
+ if master:
+ gluster_cmd_dir = gconf.get("gluster-command-dir")
+ else:
+ gluster_cmd_dir = gconf.get("slave-gluster-command-dir")
+
+ gluster_cmd = os.path.join(gluster_cmd_dir, 'gluster')
+ po = Popen(prelude + [gluster_cmd, '--xml', '--remote-host=' + host,
'volume', 'info', vol],
stdout=PIPE, stderr=PIPE, universal_newlines=True)
vix = po.stdout.read()
@@ -892,7 +949,7 @@ class Volinfo(object):
@memoize
def bricks(self):
def bparse(b):
- host, dirp = b.find("name").text.split(':', 2)
+ host, dirp = host_brick_split(b.find("name").text)
return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text}
return [bparse(b) for b in self.get('brick')]
@@ -924,6 +981,14 @@ class Volinfo(object):
else:
return int(self.get('disperseCount')[0].text)
+ def distribution_count(self, tier, hot):
+ if (tier and hot):
+ return int(self.get('hotBricks/hotdistCount')[0].text)
+ elif (tier and not hot):
+ return int(self.get('coldBricks/colddistCount')[0].text)
+ else:
+ return int(self.get('distCount')[0].text)
+
@property
@memoize
def hot_bricks(self):
@@ -960,6 +1025,16 @@ class VolinfoFromGconf(object):
def is_hot(self, brickpath):
return False
+ def is_uuid(self, value):
+ try:
+ uuid.UUID(value)
+ return True
+ except ValueError:
+ return False
+
+ def possible_path(self, value):
+ return "/" in value
+
@property
@memoize
def bricks(self):
@@ -973,8 +1048,22 @@ class VolinfoFromGconf(object):
out = []
for b in bricks_data:
parts = b.split(":")
- bpath = parts[2] if len(parts) == 3 else ""
- out.append({"host": parts[1], "dir": bpath, "uuid": parts[0]})
+ b_uuid = None
+ if self.is_uuid(parts[0]):
+ b_uuid = parts[0]
+ # Set all parts except first
+ parts = parts[1:]
+
+ if self.possible_path(parts[-1]):
+ bpath = parts[-1]
+ # Set all parts except last
+ parts = parts[0:-1]
+
+ out.append({
+ "host": ":".join(parts), # if remaining parts are IPv6 name
+ "dir": bpath,
+ "uuid": b_uuid
+ })
return out
@@ -992,6 +1081,9 @@ class VolinfoFromGconf(object):
def disperse_count(self, tier, hot):
return gconf.get("master-disperse-count")
+ def distribution_count(self, tier, hot):
+ return gconf.get("master-distribution-count")
+
@property
@memoize
def hot_bricks(self):
diff --git a/geo-replication/tests/unit/test_gsyncdstatus.py b/geo-replication/tests/unit/test_gsyncdstatus.py
index 483023dbfe9..9c1aa2ad4ad 100755
--- a/geo-replication/tests/unit/test_gsyncdstatus.py
+++ b/geo-replication/tests/unit/test_gsyncdstatus.py
@@ -13,11 +13,11 @@ import unittest
import os
import urllib
-from syncdaemon.gstatus import GeorepStatus, set_monitor_status
-from syncdaemon.gstatus import get_default_values
-from syncdaemon.gstatus import MONITOR_STATUS, DEFAULT_STATUS
-from syncdaemon.gstatus import STATUS_VALUES, CRAWL_STATUS_VALUES
-from syncdaemon.gstatus import human_time, human_time_utc
+from syncdaemon.gstatus import (GeorepStatus, set_monitor_status,
+ get_default_values,
+ MONITOR_STATUS, DEFAULT_STATUS,
+ STATUS_VALUES, CRAWL_STATUS_VALUES,
+ human_time, human_time_utc)
class GeorepStatusTestCase(unittest.TestCase):
diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in
index 6af4e108f7f..4a2edb7bf07 100644
--- a/glusterfs-api.pc.in
+++ b/glusterfs-api.pc.in
@@ -9,4 +9,4 @@ Description: GlusterFS API
Version: @GFAPI_VERSION@
Requires: @PKGCONFIG_UUID@
Libs: -L${libdir} @GFAPI_LIBS@ -lgfapi -lglusterfs -lgfrpc -lgfxdr
-Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@
+Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 58d5c6bfd5b..b6d63146e14 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -18,15 +18,6 @@
%global _with_asan %{nil}
%endif
-# bd
-# if you wish to compile an rpm without the BD map support...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without bd
-%{?_without_bd:%global _without_bd --disable-bd-xlator}
-
-%if ( 0%{?rhel} && 0%{?rhel} > 7 )
-%global _without_bd --without-bd
-%endif
-
# cmocka
# if you wish to compile an rpm with cmocka unit testing...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with cmocka
@@ -79,16 +70,6 @@
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without ocf
%{?_without_ocf:%global _without_ocf --without-ocf}
-# rdma
-# if you wish to compile an rpm without rdma support, compile like this...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without rdma
-%{?_without_rdma:%global _without_rdma --disable-ibverbs}
-
-# No RDMA Support on s390(x)
-%ifarch s390 s390x armv7hl
-%global _without_rdma --disable-ibverbs
-%endif
-
# server
# if you wish to build rpms without server components, compile like this
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without server
@@ -130,6 +111,12 @@
## All %%global definitions should be placed here and keep them sorted
##
+# selinux booleans whose defalut value needs modification
+# these booleans will be consumed by "%%selinux_set_booleans" macro.
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+%global selinuxbooleans rsync_full_access=1 rsync_client=1
+%endif
+
%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
%global _with_systemd true
%endif
@@ -177,6 +164,7 @@
# can't seem to make a generic macro that works
%global glusterd_svcfile %{_unitdir}/glusterd.service
%global glusterfsd_svcfile %{_unitdir}/glusterfsd.service
+%global glusterta_svcfile %{_unitdir}/gluster-ta-volume.service
%global glustereventsd_svcfile %{_unitdir}/glustereventsd.service
%global glusterfssharedstorage_svcfile %{_unitdir}/glusterfssharedstorage.service
%else
@@ -249,7 +237,9 @@ Requires(pre): shadow-utils
BuildRequires: systemd
%endif
-Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libgfrpc0%{?_isa} = %{version}-%{release}
+Requires: libgfxdr0%{?_isa} = %{version}-%{release}
%if ( 0%{?_with_systemd:1} )
%{?systemd_requires}
%endif
@@ -290,76 +280,42 @@ BuildRequires: libattr-devel
BuildRequires: firewalld
%endif
-Obsoletes: hekafs
Obsoletes: %{name}-common < %{version}-%{release}
Obsoletes: %{name}-core < %{version}-%{release}
-Obsoletes: %{name}-ufo
Obsoletes: %{name}-ganesha
+Obsoletes: %{name}-rdma < %{version}-%{release}
+%if ( 0%{!?_with_gnfs:1} )
+Obsoletes: %{name}-gnfs < %{version}-%{release}
+%endif
Provides: %{name}-common = %{version}-%{release}
Provides: %{name}-core = %{version}-%{release}
%description
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package includes the glusterfs binary, the glusterfsd daemon and the
libglusterfs and glusterfs translator modules common to both GlusterFS server
and client framework.
-%package api
-Summary: GlusterFS api library
-Requires: %{name}%{?_isa} = %{version}-%{release}
-Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
-
-%description api
-GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
-
-This package provides the glusterfs libgfapi library.
-
-%package api-devel
-Summary: Development Libraries
-Requires: %{name}%{?_isa} = %{version}-%{release}
-Requires: %{name}-devel%{?_isa} = %{version}-%{release}
-Requires: libacl-devel
-
-%description api-devel
-GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
-
-This package provides the api include files.
-
-%if ( 0%{!?_without_server:1} )
%package cli
Summary: GlusterFS CLI
-Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libglusterd0%{?_isa} = %{version}-%{release}
%description cli
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides the GlusterFS CLI application and its man page
-%endif
%package cloudsync-plugins
Summary: Cloudsync Plugins
@@ -367,32 +323,14 @@ BuildRequires: libcurl-devel
%description cloudsync-plugins
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides cloudsync plugins for archival feature.
-%package devel
-Summary: Development Libraries
-Requires: %{name}%{?_isa} = %{version}-%{release}
-# Needed for the Glupy examples to work
-Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release}
-
-%description devel
-GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
-
-This package provides the development libraries and include files.
-
%package extra-xlators
Summary: Extra Gluster filesystem Translators
# We need python-gluster rpm for gluster module's __init__.py in Python
@@ -402,12 +340,11 @@ Requires: python%{_pythonver}
%description extra-xlators
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides extra filesystem Translators, such as Glupy,
for GlusterFS.
@@ -426,6 +363,49 @@ Provides: %{name}-client = %{version}-%{release}
%description fuse
GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides support to FUSE based clients and inlcudes the
+glusterfs(d) binary.
+
+%if ( 0%{!?_without_server:1} )
+%package ganesha
+Summary: NFS-Ganesha configuration
+Group: Applications/File
+
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: nfs-ganesha-selinux >= 2.7.6
+Requires: nfs-ganesha-gluster >= 2.7.6
+Requires: pcs >= 0.10.0
+Requires: resource-agents >= 4.2.0
+Requires: dbus
+
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+Requires: cman, pacemaker, corosync
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 5 )
+# we need portblock resource-agent in 3.9.5 and later.
+Requires: net-tools
+%endif
+
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+Requires: selinux-policy >= 3.13.1-160
+Requires(post): policycoreutils-python
+Requires(postun): policycoreutils-python
+%else
+Requires(post): policycoreutils-python-utils
+Requires(postun): policycoreutils-python-utils
+%endif
+%endif
+
+%description ganesha
+GlusterFS is a distributed file-system capable of scaling to several
petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
system. GlusterFS is one of the most sophisticated file systems in
@@ -433,8 +413,9 @@ terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
is in user space and easily manageable.
-This package provides support to FUSE based clients and inlcudes the
-glusterfs(d) binary.
+This package provides the configuration and related files for using
+NFS-Ganesha as the NFS server using GlusterFS
+%endif
%if ( 0%{!?_without_georeplication:1} )
%package geo-replication
@@ -447,15 +428,22 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release}
Requires: rsync
Requires: util-linux
+# required for setting selinux bools
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+Requires(post): policycoreutils-python-utils
+Requires(postun): policycoreutils-python-utils
+Requires: selinux-policy-targeted
+Requires(post): selinux-policy-targeted
+BuildRequires: selinux-policy-devel
+%endif
%description geo-replication
GlusterFS is a distributed file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides support to geo-replication.
%endif
@@ -469,29 +457,187 @@ Requires: nfs-utils
%description gnfs
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides the glusterfs legacy gNFS server xlator
%endif
-%package libs
-Summary: GlusterFS common libraries
+%package -n libglusterfs0
+Summary: GlusterFS libglusterfs library
+Requires: libgfrpc0%{?_isa} = %{version}-%{release}
+Requires: libgfxdr0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+Provides: %{name}-libs = %{version}-%{release}
-%description libs
+%description -n libglusterfs0
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the base libglusterfs library
+
+%package -n libglusterfs-devel
+Summary: GlusterFS libglusterfs library
+Requires: libgfrpc-devel%{?_isa} = %{version}-%{release}
+Requires: libgfxdr-devel%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+Provides: %{name}-devel = %{version}-%{release}
+
+%description -n libglusterfs-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libglusterfs.so and the gluster C header files.
+
+%package -n libgfapi0
+Summary: GlusterFS api library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-api <= %{version}-%{release}
+Provides: %{name}-api = %{version}-%{release}
+
+%description -n libgfapi0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the glusterfs libgfapi library.
+
+%package -n libgfapi-devel
+Summary: Development Libraries
+Requires: libglusterfs-devel%{?_isa} = %{version}-%{release}
+Requires: libacl-devel
+Obsoletes: %{name}-api-devel <= %{version}-%{release}
+Provides: %{name}-api-devel = %{version}-%{release}
+
+%description -n libgfapi-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfapi.so and the api C header files.
+
+%package -n libgfchangelog0
+Summary: GlusterFS libchangelog library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfchangelog0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libgfchangelog library
+
+%package -n libgfchangelog-devel
+Summary: GlusterFS libchangelog library
+Requires: libglusterfs-devel%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfchangelog-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfchangelog.so and changelog C header files.
+
+%package -n libgfrpc0
+Summary: GlusterFS libgfrpc0 library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfrpc0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
-This package provides the base GlusterFS libraries
+This package provides the libgfrpc library
+
+%package -n libgfrpc-devel
+Summary: GlusterFS libgfrpc library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfrpc-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfrpc.so and rpc C header files.
+
+%package -n libgfxdr0
+Summary: GlusterFS libgfxdr0 library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfxdr0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libgfxdr library
+
+%package -n libgfxdr-devel
+Summary: GlusterFS libgfxdr library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfxdr-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfxdr.so.
+
+%package -n libglusterd0
+Summary: GlusterFS libglusterd library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libglusterd0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libglusterd library
%package -n python%{_pythonver}-gluster
Summary: GlusterFS python library
@@ -504,39 +650,15 @@ Obsoletes: python-gluster < 3.10
%description -n python%{_pythonver}-gluster
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package contains the python modules of GlusterFS and own gluster
namespace.
-%if ( 0%{!?_without_rdma:1} )
-%package rdma
-Summary: GlusterFS rdma support for ib-verbs
-%if ( 0%{?fedora} && 0%{?fedora} > 26 )
-BuildRequires: rdma-core-devel
-%else
-BuildRequires: libibverbs-devel
-BuildRequires: librdmacm-devel >= 1.0.15
-%endif
-Requires: %{name}%{?_isa} = %{version}-%{release}
-
-%description rdma
-GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
-
-This package provides support to ib-verbs library.
-%endif
-
%package regression-tests
Summary: Development Tools
Requires: %{name}%{?_isa} = %{version}-%{release}
@@ -566,12 +688,11 @@ Requires: %{_prefix}/lib/ocf/resource.d
%description resource-agents
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides the resource agents which plug glusterd into
Open Cluster Framework (OCF) compliant cluster resource managers,
@@ -583,11 +704,15 @@ like Pacemaker.
Summary: Clustered file-system server
Requires: %{name}%{?_isa} = %{version}-%{release}
Requires: %{name}-cli%{?_isa} = %{version}-%{release}
-Requires: %{name}-libs%{?_isa} = %{version}-%{release}
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libgfchangelog0%{?_isa} = %{version}-%{release}
+%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) )
+Requires: glusterfs-selinux >= 0.1.0-2
+%endif
# some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse
Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
# self-heal daemon, rebalance, nfs-server etc. are actually clients
-Requires: %{name}-api%{?_isa} = %{version}-%{release}
+Requires: libgfapi0%{?_isa} = %{version}-%{release}
Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
# lvm2 for snapshot, and nfs-utils and rpcbind/portmap for gnfs server
Requires: lvm2
@@ -626,27 +751,37 @@ Requires: valgrind
%description server
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides the glusterfs server daemon.
%endif
+%package thin-arbiter
+Summary: GlusterFS thin-arbiter module
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+
+%description thin-arbiter
+This package provides a tie-breaker functionality to GlusterFS
+replicate volume. It includes translators required to provide the
+functionality, and also few other scripts required for getting the setup done.
+
+This package provides the glusterfs thin-arbiter translator.
+
%package client-xlators
Summary: GlusterFS client-side translators
%description client-xlators
GlusterFS is a distributed file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides the translators needed on any GlusterFS client.
@@ -703,7 +838,6 @@ done
%{?_without_fusermount} \
%{?_without_georeplication} \
%{?_without_ocf} \
- %{?_without_rdma} \
%{?_without_server} \
%{?_without_syslog} \
%{?_with_ipv6default} \
@@ -724,13 +858,13 @@ make check
%install
rm -rf %{buildroot}
make install DESTDIR=%{buildroot}
+%if ( 0%{!?_without_server:1} )
%if ( 0%{_for_fedora_koji_builds} )
install -D -p -m 0644 %{SOURCE1} \
%{buildroot}%{_sysconfdir}/sysconfig/glusterd
install -D -p -m 0644 %{SOURCE2} \
%{buildroot}%{_sysconfdir}/sysconfig/glusterfsd
%else
-%if ( 0%{!?_without_server:1} )
install -D -p -m 0644 extras/glusterd-sysconfig \
%{buildroot}%{_sysconfdir}/sysconfig/glusterd
%endif
@@ -778,13 +912,24 @@ sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sha
%endif
# Install glusterfsd .service or init.d file
+%if ( 0%{!?_without_server:1} )
%if ( 0%{_for_fedora_koji_builds} )
%service_install glusterfsd %{glusterfsd_svcfile}
%endif
+%endif
install -D -p -m 0644 extras/glusterfs-logrotate \
%{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
+# ganesha ghosts
+%if ( 0%{!?_without_server:1} )
+mkdir -p %{buildroot}%{_sysconfdir}/ganesha
+touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf
+mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/
+touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
+touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+%endif
+
%if ( 0%{!?_without_georeplication:1} )
# geo-rep ghosts
mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
@@ -817,10 +962,8 @@ touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid
find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs
## Install bash completion for cli
-%if ( 0%{!?_without_server:1} )
install -p -m 0744 -D extras/command-completion/gluster.bash \
%{buildroot}%{_sysconfdir}/bash_completion.d/gluster
-%endif
%clean
rm -rf %{buildroot}
@@ -837,23 +980,46 @@ rm -rf %{buildroot}
%endif
exit 0
-%post api
-/sbin/ldconfig
-
%if ( 0%{!?_without_events:1} )
%post events
%systemd_post glustereventsd
%endif
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%post ganesha
+semanage boolean -m ganesha_use_fusefs --on
+exit 0
+%endif
+%endif
+
%if ( 0%{!?_without_georeplication:1} )
%post geo-replication
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+%selinux_set_booleans %{selinuxbooleans}
+%endif
if [ $1 -ge 1 ]; then
%systemd_postun_with_restart glusterd
fi
exit 0
%endif
-%post libs
+%post -n libglusterfs0
+/sbin/ldconfig
+
+%post -n libgfapi0
+/sbin/ldconfig
+
+%post -n libgfchangelog0
+/sbin/ldconfig
+
+%post -n libgfrpc0
+/sbin/ldconfig
+
+%post -n libgfxdr0
+/sbin/ldconfig
+
+%post -n libglusterd0
/sbin/ldconfig
%if ( 0%{!?_without_server:1} )
@@ -973,32 +1139,62 @@ fi
exit 0
%endif
+%preun thin-arbiter
+if [ $1 -eq 0 ]; then
+ if [ -f %glusterta_svcfile ]; then
+ %service_stop gluster-ta-volume
+ %systemd_preun gluster-ta-volume
+ fi
+fi
+
##-----------------------------------------------------------------------------
## All %%postun should be placed here and keep them sorted
##
%postun
-/sbin/ldconfig
%if ( 0%{!?_without_syslog:1} )
%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
%systemd_postun_with_restart rsyslog
%endif
%endif
-%postun api
-/sbin/ldconfig
-
-%postun libs
-/sbin/ldconfig
-
%if ( 0%{!?_without_server:1} )
%postun server
-/sbin/ldconfig
%if (0%{?_with_firewalld:1})
%firewalld_reload
%endif
exit 0
%endif
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%postun ganesha
+semanage boolean -m ganesha_use_fusefs --off
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%trigger should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%trigger ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --on
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%triggerun should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%triggerun ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --off
+exit 0
+%endif
+%endif
+
##-----------------------------------------------------------------------------
## All %%files should be placed here and keep them grouped
##
@@ -1009,9 +1205,6 @@ exit 0
%exclude %{_mandir}/man8/gluster.8*
%endif
%dir %{_localstatedir}/log/glusterfs
-%if ( 0%{!?_without_rdma:1} )
-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
-%endif
%if 0%{?!_without_server:1}
%dir %{_datadir}/glusterfs
%dir %{_datadir}/glusterfs/scripts
@@ -1033,8 +1226,6 @@ exit 0
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/io-stats.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/sink.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/trace.so
-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption
- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/crypt.so
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/access-control.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so
@@ -1067,40 +1258,50 @@ exit 0
%{_tmpfilesdir}/gluster.conf
%endif
-%files api
-%exclude %{_libdir}/*.so
-# libgfapi files
-%{_libdir}/libgfapi.*
-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount
- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api.so
-
-%files api-devel
-%{_libdir}/pkgconfig/glusterfs-api.pc
-%{_libdir}/libgfapi.so
-%dir %{_includedir}/glusterfs
-%dir %{_includedir}/glusterfs/api
- %{_includedir}/glusterfs/api/*
+%if ( 0%{?_without_server:1} )
+#exclude ganesha related files
+%exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+%exclude %{_libexecdir}/ganesha/*
+%exclude %{_prefix}/lib/ocf/resource.d/heartbeat/*
+%endif
-%if ( 0%{!?_without_server:1} )
%files cli
%{_sbindir}/gluster
%{_mandir}/man8/gluster.8*
%{_sysconfdir}/bash_completion.d/gluster
-%endif
%files cloudsync-plugins
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins
%{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsyncs3.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsynccvlt.so
-%files devel
+%files -n libglusterfs-devel
%dir %{_includedir}/glusterfs
- %{_includedir}/glusterfs/*
-%exclude %{_includedir}/glusterfs/api
-%exclude %{_libdir}/libgfapi.so
-%{_libdir}/*.so
+ %{_includedir}/glusterfs/*.h
+ %{_includedir}/glusterfs/server/*.h
+%{_libdir}/libglusterfs.so
+
+%files -n libgfapi-devel
+%dir %{_includedir}/glusterfs/api
+ %{_includedir}/glusterfs/api/*.h
+%{_libdir}/libgfapi.so
+%{_libdir}/pkgconfig/glusterfs-api.pc
+
+
+%files -n libgfchangelog-devel
+%dir %{_includedir}/glusterfs/gfchangelog
+ %{_includedir}/glusterfs/gfchangelog/*.h
+%{_libdir}/libgfchangelog.so
%{_libdir}/pkgconfig/libgfchangelog.pc
+%files -n libgfrpc-devel
+%dir %{_includedir}/glusterfs/rpc
+ %{_includedir}/glusterfs/rpc/*.h
+%{_libdir}/libgfrpc.so
+
+%files -n libgfxdr-devel
+%{_libdir}/libgfxdr.so
+
%files client-xlators
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster
@@ -1110,15 +1311,10 @@ exit 0
%files extra-xlators
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption
- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13.so
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so
-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing
-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance
- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache.so
%files fuse
# glusterfs is a symlink to glusterfsd, -server depends on -fuse.
@@ -1144,6 +1340,18 @@ exit 0
%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
%endif
+%files thin-arbiter
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh
+%config %{_sysconfdir}/glusterfs/thin-arbiter.vol
+
+%if ( 0%{?_with_systemd:1} )
+%{_unitdir}/gluster-ta-volume.service
+%endif
+
%if ( 0%{!?_without_georeplication:1} )
%files geo-replication
%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-georep
@@ -1155,6 +1363,13 @@ exit 0
%dir %{_libexecdir}/glusterfs/python/syncdaemon
%{_libexecdir}/glusterfs/gsyncd
%{_libexecdir}/glusterfs/python/syncdaemon/*
+%dir %{_libexecdir}/glusterfs/scripts
+ %{_libexecdir}/glusterfs/scripts/get-gfid.sh
+ %{_libexecdir}/glusterfs/scripts/slave-upgrade.sh
+ %{_libexecdir}/glusterfs/scripts/gsync-upgrade.sh
+ %{_libexecdir}/glusterfs/scripts/generate-gfid-file.sh
+ %{_libexecdir}/glusterfs/scripts/gsync-sync-gfid
+ %{_libexecdir}/glusterfs/scripts/schedule_georep.py*
%{_libexecdir}/glusterfs/gverify.sh
%{_libexecdir}/glusterfs/set_geo_rep_pem_keys.sh
%{_libexecdir}/glusterfs/peer_gsec_create
@@ -1171,43 +1386,62 @@ exit 0
%attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post/S56glusterd-geo-rep-create-post.sh
%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre
-%dir %{_datadir}/glusterfs
-%dir %{_datadir}/glusterfs/scripts
- %{_datadir}/glusterfs/scripts/get-gfid.sh
- %{_datadir}/glusterfs/scripts/slave-upgrade.sh
- %{_datadir}/glusterfs/scripts/gsync-upgrade.sh
- %{_datadir}/glusterfs/scripts/generate-gfid-file.sh
- %{_datadir}/glusterfs/scripts/gsync-sync-gfid
- %{_datadir}/glusterfs/scripts/schedule_georep.py*
%endif
-%files libs
-%{_libdir}/*.so.*
-%exclude %{_libdir}/libgfapi.*
+%files -n libglusterfs0
+%{_libdir}/libglusterfs.so.*
+
+%files -n libgfapi0
+%{_libdir}/libgfapi.so.*
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api.so
+
+%files -n libgfchangelog0
+%{_libdir}/libgfchangelog.so.*
+
+%files -n libgfrpc0
+%{_libdir}/libgfrpc.so.*
+
+%files -n libgfxdr0
+%{_libdir}/libgfxdr.so.*
+
+%files -n libglusterd0
+%{_libdir}/libglusterd.so.*
+%exclude %{_libdir}/libglusterd.so
%files -n python%{_pythonver}-gluster
# introducing glusterfs module in site packages.
# so that all other gluster submodules can reside in the same namespace.
%if ( %{_usepython3} )
%dir %{python3_sitelib}/gluster
+ %{python3_sitelib}/gluster/__init__.*
+ %{python3_sitelib}/gluster/__pycache__
%{python3_sitelib}/gluster/cliutils
%else
%dir %{python2_sitelib}/gluster
+ %{python2_sitelib}/gluster/__init__.*
%{python2_sitelib}/gluster/cliutils
%endif
-%if ( 0%{!?_without_rdma:1} )
-%files rdma
-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport
- %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
-%endif
-
%files regression-tests
%dir %{_datadir}/glusterfs
%{_datadir}/glusterfs/run-tests.sh
%{_datadir}/glusterfs/tests
%exclude %{_datadir}/glusterfs/tests/vagrant
+%if ( 0%{!?_without_server:1} )
+%files ganesha
+%dir %{_libexecdir}/ganesha
+%{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+%{_libexecdir}/ganesha/*
+%{_prefix}/lib/ocf/resource.d/heartbeat/*
+%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
+%ghost %attr(0644,-,-) %config(noreplace) %{_sysconfdir}/ganesha/ganesha-ha.conf
+%ghost %dir %attr(0755,-,-) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha
+%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
+%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+%endif
+
%if ( 0%{!?_without_ocf:1} )
%files resource-agents
# /usr/lib is the standard for OCF, also on x86_64
@@ -1219,6 +1453,7 @@ exit 0
%doc extras/clear_xattrs.sh
# sysconf
%config(noreplace) %{_sysconfdir}/glusterfs
+%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
%exclude %{_sysconfdir}/glusterfs/eventsconfig.json
%exclude %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
%exclude %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
@@ -1241,7 +1476,7 @@ exit 0
# binaries
%{_sbindir}/glusterd
-%{_sbindir}/glfsheal
+%{_libexecdir}/glusterfs/glfsheal
%{_sbindir}/gf_attach
%{_sbindir}/gluster-setgfid2path
# {_sbindir}/glusterfsd is the actual binary, but glusterfs (client) is a
@@ -1255,7 +1490,6 @@ exit 0
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so
- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so
@@ -1275,8 +1509,6 @@ exit 0
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so
%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so
-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance
- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so
# snap_scheduler
%{_sbindir}/snap_scheduler.py
@@ -1293,6 +1525,8 @@ exit 0
%attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/gluster-block
%attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/nl-cache
%attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/db-workload
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/distributed-virt
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/samba
%dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind
%dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind/.keys
%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd
@@ -1301,6 +1535,7 @@ exit 0
%dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
%dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
%attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh
%attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh
%dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
%attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
@@ -1373,7 +1608,7 @@ exit 0
%dir %{_sharedstatedir}/glusterd
%dir %{_sharedstatedir}/glusterd/events
%dir %{_libexecdir}/glusterfs
- %{_libexecdir}/glusterfs/events
+ %{_libexecdir}/glusterfs/gfevents
%{_libexecdir}/glusterfs/peer_eventsapi.py*
%{_sbindir}/glustereventsd
%{_sbindir}/gluster-eventsapi
@@ -1386,6 +1621,48 @@ exit 0
%endif
%changelog
+* Thu May 14 2020 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- refactor, common practice, Issue #1126
+
+* Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
+- added requires policycoreutils-python-utils on rhel8 for geo-replication
+
+* Wed Oct 9 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- remove leftover bd xlator cruft
+
+* Fri Aug 23 2019 Shwetha K Acharya <sacharya@redhat.com>
+- removed {name}-ufs from Obsoletes
+- added "< version" for obsoletes {name}-gnfs and {name}-rdma
+
+* Mon Jul 15 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+- Adding ganesha ha bits back in gluster repository
+
+* Fri Jul 12 2019 Amar Tumballi <amarts@redhat.com>
+- Remove rdma package, and mark older rdma package as 'Obsoletes'
+
+* Fri Jun 14 2019 Niels de Vos <ndevos@redhat.com>
+- always build glusterfs-cli to allow monitoring/managing from clients
+
+* Wed Mar 6 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- remove unneeded ldconfig in scriptlets
+- reported by Igor Gnatenko in Fedora
+- https://src.fedoraproject.org/rpms/glusterfs/pull-request/5
+
+* Mon Mar 4 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- s390x has RDMA, since around Fedora 27 and in RHEL7 since June 2016.
+
+* Tue Feb 26 2019 Ashish Pandey <aspandey@redhat.com>
+- Add thin-arbiter package
+
+* Sun Feb 24 2019 Aravinda VK <avishwan@redhat.com>
+- Renamed events package to gfevents
+
+* Thu Feb 21 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+- Obsoleting gluster-gnfs package
+
+* Wed Nov 28 2018 Krutika Dhananjay <kdhananj@redhat.com>
+- Install /var/lib/glusterd/groups/distributed-virt by default
+
* Tue Nov 13 2018 Niels de Vos <ndevos@redhat.com>
- Add an option to build with ThreadSanitizer (TSAN)
@@ -1453,9 +1730,6 @@ exit 0
* Thu Feb 16 2017 Niels de Vos <ndevos@redhat.com>
- Obsolete and Provide python-gluster for upgrading from glusterfs < 3.10
-* Tue Feb 7 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
-- remove ganesha (#1418417)
-
* Wed Feb 1 2017 Poornima G <pgurusid@redhat.com>
- Install /var/lib/glusterd/groups/metadata-cache by default
diff --git a/glusterfsd/src/Makefile.am b/glusterfsd/src/Makefile.am
index 7b8d1dbf1fb..a0a778158d8 100644
--- a/glusterfsd/src/Makefile.am
+++ b/glusterfsd/src/Makefile.am
@@ -6,14 +6,15 @@ endif
glusterfsd_SOURCES = glusterfsd.c glusterfsd-mgmt.c
glusterfsd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(top_builddir)/rpc/xdr/src/libgfxdr.la ${GF_LDADD}
-glusterfsd_LDFLAGS = $(GF_LDFLAGS) $(LIB_DL)
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la $(GF_LDADD) $(LIB_DL)
+glusterfsd_LDFLAGS = $(GF_LDFLAGS)
gf_attach_SOURCES = gf_attach.c
gf_attach_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/api/src/libgfapi.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la
+gf_attach_LDFLAGS = $(GF_LDFLAGS)
noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h glusterfsd-messages.h
diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c
index 07ec0ed0686..c553b0b1f61 100644
--- a/glusterfsd/src/gf_attach.c
+++ b/glusterfsd/src/gf_attach.c
@@ -12,18 +12,23 @@
#include <stdlib.h>
#include <unistd.h>
-//#include "config.h"
-#include "glusterfs.h"
-#include "globals.h"
+#include <glusterfs/glusterfs.h>
#include "glfs-internal.h"
#include "rpc-clnt.h"
#include "protocol-common.h"
#include "xdr-generic.h"
#include "glusterd1-xdr.h"
+/* In seconds */
+#define CONNECT_TIMEOUT 60
+#define REPLY_TIMEOUT 120
+
int done = 0;
int rpc_status;
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
struct rpc_clnt_procedure gf_attach_actors[GLUSTERD_BRICK_MAXVALUE] = {
[GLUSTERD_BRICK_NULL] = {"NULL", NULL},
[GLUSTERD_BRICK_OP] = {"BRICK_OP", NULL},
@@ -40,8 +45,12 @@ struct rpc_clnt_program gf_attach_prog = {
int32_t
my_callback(struct rpc_req *req, struct iovec *iov, int count, void *frame)
{
+ pthread_mutex_lock(&mutex);
rpc_status = req->rpc_status;
done = 1;
+ /* Signal main thread which is the only waiter */
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
return 0;
}
@@ -50,6 +59,7 @@ int
send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
{
int ret = -1;
+ struct timespec ts;
struct iobuf *iobuf = NULL;
struct iobref *iobref = NULL;
struct iovec iov = {
@@ -59,12 +69,13 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
call_frame_t *frame = NULL;
gd1_mgmt_brick_op_req brick_req;
void *req = &brick_req;
- int i;
brick_req.op = op;
brick_req.name = path;
brick_req.input.input_val = NULL;
brick_req.input.input_len = 0;
+ brick_req.dict.dict_val = NULL;
+ brick_req.dict.dict_len = 0;
req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
@@ -75,10 +86,6 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
if (!iobref)
goto out;
- frame = create_frame(this, this->ctx->pool);
- if (!frame)
- goto out;
-
iobref_add(iobref, iobuf);
iov.iov_base = iobuf->ptr;
@@ -91,20 +98,44 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
iov.iov_len = ret;
- for (i = 0; i < 60; ++i) {
- if (rpc->conn.connected) {
- break;
- }
- sleep(1);
+ /* Wait for connection */
+ timespec_now_realtime(&ts);
+ ts.tv_sec += CONNECT_TIMEOUT;
+ pthread_mutex_lock(&rpc->conn.lock);
+ {
+ while (!rpc->conn.connected)
+ if (pthread_cond_timedwait(&rpc->conn.cond, &rpc->conn.lock, &ts) ==
+ ETIMEDOUT) {
+ fprintf(stderr, "timeout waiting for RPC connection\n");
+ pthread_mutex_unlock(&rpc->conn.lock);
+ return EXIT_FAILURE;
+ }
+ }
+ pthread_mutex_unlock(&rpc->conn.lock);
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
}
/* Send the msg */
ret = rpc_clnt_submit(rpc, &gf_attach_prog, op, my_callback, &iov, 1, NULL,
0, iobref, frame, NULL, 0, NULL, 0, NULL);
if (!ret) {
- for (i = 0; !done && (i < 120); ++i) {
- sleep(1);
+ /* OK, wait for callback */
+ timespec_now_realtime(&ts);
+ ts.tv_sec += REPLY_TIMEOUT;
+ pthread_mutex_lock(&mutex);
+ {
+ while (!done)
+ if (pthread_cond_timedwait(&cond, &mutex, &ts) == ETIMEDOUT) {
+ fprintf(stderr, "timeout waiting for RPC reply\n");
+ pthread_mutex_unlock(&mutex);
+ return EXIT_FAILURE;
+ }
}
+ pthread_mutex_unlock(&mutex);
}
out:
diff --git a/glusterfsd/src/glusterfsd-mem-types.h b/glusterfsd/src/glusterfsd-mem-types.h
index 8df01c475cb..e59b558deb0 100644
--- a/glusterfsd/src/glusterfsd-mem-types.h
+++ b/glusterfsd/src/glusterfsd-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __GLUSTERFSD_MEM_TYPES_H__
#define __GLUSTERFSD_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h
index 4adc9fd030a..0cdbffa71ea 100644
--- a/glusterfsd/src/glusterfsd-messages.h
+++ b/glusterfsd/src/glusterfsd-messages.h
@@ -11,7 +11,7 @@
#ifndef _GLUSTERFSD_MESSAGES_H_
#define _GLUSTERFSD_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -23,18 +23,71 @@
* glfs-message-id.h.
*/
-GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3,
- glusterfsd_msg_4, glusterfsd_msg_5, glusterfsd_msg_6,
- glusterfsd_msg_7, glusterfsd_msg_8, glusterfsd_msg_9,
- glusterfsd_msg_10, glusterfsd_msg_11, glusterfsd_msg_12,
- glusterfsd_msg_13, glusterfsd_msg_14, glusterfsd_msg_15,
- glusterfsd_msg_16, glusterfsd_msg_17, glusterfsd_msg_18,
- glusterfsd_msg_19, glusterfsd_msg_20, glusterfsd_msg_21,
- glusterfsd_msg_22, glusterfsd_msg_23, glusterfsd_msg_24,
- glusterfsd_msg_25, glusterfsd_msg_26, glusterfsd_msg_27,
- glusterfsd_msg_28, glusterfsd_msg_29, glusterfsd_msg_30,
- glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33,
- glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36,
- glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39);
+GLFS_MSGID(
+ GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3,
+ glusterfsd_msg_4, glusterfsd_msg_5, glusterfsd_msg_6, glusterfsd_msg_7,
+ glusterfsd_msg_8, glusterfsd_msg_9, glusterfsd_msg_10, glusterfsd_msg_11,
+ glusterfsd_msg_12, glusterfsd_msg_13, glusterfsd_msg_14, glusterfsd_msg_15,
+ glusterfsd_msg_16, glusterfsd_msg_17, glusterfsd_msg_18, glusterfsd_msg_19,
+ glusterfsd_msg_20, glusterfsd_msg_21, glusterfsd_msg_22, glusterfsd_msg_23,
+ glusterfsd_msg_24, glusterfsd_msg_25, glusterfsd_msg_26, glusterfsd_msg_27,
+ glusterfsd_msg_28, glusterfsd_msg_29, glusterfsd_msg_30, glusterfsd_msg_31,
+ glusterfsd_msg_32, glusterfsd_msg_33, glusterfsd_msg_34, glusterfsd_msg_35,
+ glusterfsd_msg_36, glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39,
+ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42, glusterfsd_msg_43,
+ glusterfsd_msg_029, glusterfsd_msg_041, glusterfsd_msg_042);
+
+#define glusterfsd_msg_1_STR "Could not create absolute mountpoint path"
+#define glusterfsd_msg_2_STR "Could not get current working directory"
+#define glusterfsd_msg_4_STR "failed to set mount-point to options dictionary"
+#define glusterfsd_msg_3_STR "failed to set dict value for key"
+#define glusterfsd_msg_5_STR "failed to set disable for key"
+#define glusterfsd_msg_6_STR "failed to set enable for key"
+#define glusterfsd_msg_7_STR \
+ "Not a client process, not performing mount operation"
+#define glusterfsd_msg_8_STR "MOUNT_POINT initialization failed"
+#define glusterfsd_msg_9_STR "loading volume file failed"
+#define glusterfsd_msg_10_STR "xlator option is invalid"
+#define glusterfsd_msg_11_STR "Fetching the volume file from server..."
+#define glusterfsd_msg_12_STR "volume initialization failed"
+#define glusterfsd_msg_34_STR "memory init failed"
+#define glusterfsd_msg_13_STR "ERROR: glusterfs uuid generation failed"
+#define glusterfsd_msg_14_STR "ERROR: glusterfs pool creation failed"
+#define glusterfsd_msg_15_STR \
+ "ERROR: '--volfile-id' is mandatory if '-s' OR '--volfile-server' option " \
+ "is given"
+#define glusterfsd_msg_16_STR "ERROR: parsing the volfile failed"
+#define glusterfsd_msg_33_STR \
+ "obsolete option '--volfile-max-fecth-attempts or fetch-attempts' was " \
+ "provided"
+#define glusterfsd_msg_17_STR "pidfile open failed"
+#define glusterfsd_msg_18_STR "pidfile lock failed"
+#define glusterfsd_msg_20_STR "pidfile truncation failed"
+#define glusterfsd_msg_21_STR "pidfile write failed"
+#define glusterfsd_msg_22_STR "failed to exeute pthread_sigmask"
+#define glusterfsd_msg_23_STR "failed to create pthread"
+#define glusterfsd_msg_24_STR "daemonization failed"
+#define glusterfsd_msg_25_STR "mount failed"
+#define glusterfsd_msg_26_STR "failed to construct the graph"
+#define glusterfsd_msg_27_STR "fuse xlator cannot be specified in volume file"
+#define glusterfsd_msg_28_STR "Cannot reach volume specification file"
+#define glusterfsd_msg_29_STR "ERROR: glusterfsd context not initialized"
+#define glusterfsd_msg_43_STR \
+ "command line argument --brick-mux is valid only for brick process"
+#define glusterfsd_msg_029_STR "failed to create command line string"
+#define glusterfsd_msg_30_STR "Started running version"
+#define glusterfsd_msg_31_STR "Could not create new sync-environment"
+#define glusterfsd_msg_40_STR "No change in volfile, countinuing"
+#define glusterfsd_msg_39_STR "Unable to create/delete temporary file"
+#define glusterfsd_msg_38_STR \
+ "Not processing brick-op since volume graph is not yet active"
+#define glusterfsd_msg_35_STR "rpc req buffer unserialization failed"
+#define glusterfsd_msg_36_STR "problem in xlator loading"
+#define glusterfsd_msg_37_STR "failed to get dict value"
+#define glusterfsd_msg_41_STR "received attach request for volfile"
+#define glusterfsd_msg_42_STR "failed to unserialize xdata to dictionary"
+#define glusterfsd_msg_041_STR "can't detach. flie not found"
+#define glusterfsd_msg_042_STR \
+ "couldnot detach old graph. Aborting the reconfiguration operation"
#endif /* !_GLUSTERFSD_MESSAGES_H_ */
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 2ce56f2bb18..eaf6796e4c3 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -13,10 +13,10 @@
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
-#include "dict.h"
-#include "gf-event.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/defaults.h>
#include "rpc-clnt.h"
#include "protocol-common.h"
@@ -28,11 +28,11 @@
#include "glusterfsd.h"
#include "rpcsvc.h"
#include "cli1-xdr.h"
-#include "statedump.h"
-#include "syncop.h"
-#include "xlator.h"
-#include "syscall.h"
-#include "monitoring.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/monitoring.h>
#include "server.h"
static gf_boolean_t is_mgmt_rpc_reconnect = _gf_false;
@@ -45,9 +45,28 @@ glusterfs_volfile_fetch(glusterfs_ctx_t *ctx);
int
glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp);
int
-glusterfs_graph_unknown_options(glusterfs_graph_t *graph);
-int
emancipate(glusterfs_ctx_t *ctx, int ret);
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict);
+int
+glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ gf_volfile_t *volfile_obj, char *checksum,
+ dict_t *dict);
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict);
+int
+glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj);
+
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name);
+
+static int
+glusterfs_volume_top_perf(const char *brick_path, dict_t *dict,
+ gf_boolean_t write_test);
int
mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
@@ -62,6 +81,97 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
}
int
+mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id,
+ dict_t *dict)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ FILE *tmpfp = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+ gf_volfile_t *volfile_tmp = NULL;
+ char sha256_hash[SHA256_DIGEST_LENGTH] = {
+ 0,
+ };
+ int tmp_fd = -1;
+ char template[] = "/tmp/glfs.volfile.XXXXXX";
+
+ glusterfs_compute_sha256((const unsigned char *)volfile, size, sha256_hash);
+ ctx = THIS->ctx;
+ LOCK(&ctx->volfile_lock);
+ {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(volfile_id, volfile_obj->vol_id)) {
+ if (!memcmp(sha256_hash, volfile_obj->volfile_checksum,
+ sizeof(volfile_obj->volfile_checksum))) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_40,
+ NULL);
+ goto out;
+ }
+ volfile_tmp = volfile_obj;
+ break;
+ }
+ }
+
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode */
+ tmp_fd = mkstemp(template);
+ if (-1 == tmp_fd) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
+ "create template=%s", template, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
+ */
+ ret = sys_unlink(template);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
+ "delete template=%s", template, NULL);
+ ret = 0;
+ }
+
+ tmpfp = fdopen(tmp_fd, "w+b");
+ if (!tmpfp) {
+ ret = -1;
+ goto unlock;
+ }
+
+ fwrite(volfile, size, 1, tmpfp);
+ fflush(tmpfp);
+ if (ferror(tmpfp)) {
+ ret = -1;
+ goto unlock;
+ }
+
+ if (!volfile_tmp) {
+ /* There is no checksum in the list, which means simple attach
+ * the volfile
+ */
+ ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id,
+ sha256_hash, dict);
+ goto unlock;
+ }
+ ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj,
+ sha256_hash, dict);
+ if (ret < 0) {
+ gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!");
+ }
+ }
+unlock:
+ UNLOCK(&ctx->volfile_lock);
+out:
+ if (tmpfp)
+ fclose(tmpfp);
+ else if (tmp_fd != -1)
+ sys_close(tmp_fd);
+ return ret;
+}
+
+int
mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data)
{
return 0;
@@ -95,6 +205,7 @@ glusterfs_serialize_reply(rpcsvc_request_t *req, void *arg,
retlen = xdr_serialize_generic(*outmsg, arg, xdrproc);
if (retlen == -1) {
gf_log(THIS->name, GF_LOG_ERROR, "Failed to encode message");
+ GF_FREE(iob);
goto ret;
}
@@ -282,6 +393,10 @@ glusterfs_handle_terminate(rpcsvc_request_t *req)
err:
if (!lockflag)
UNLOCK(&ctx->volfile_lock);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
free(xlator_req.name);
xlator_req.name = NULL;
return 0;
@@ -364,10 +479,6 @@ glusterfs_handle_translator_info_get(rpcsvc_request_t *req)
dict_t *dict = NULL;
xlator_t *this = NULL;
gf1_cli_top_op top_op = 0;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
- double time = 0;
- double throughput = 0;
xlator_t *any = NULL;
xlator_t *xlator = NULL;
glusterfs_graph_t *active = NULL;
@@ -400,35 +511,23 @@ glusterfs_handle_translator_info_get(rpcsvc_request_t *req)
}
ret = dict_get_int32(dict, "top-op", (int32_t *)&top_op);
- if ((!ret) &&
- (GF_CLI_TOP_READ_PERF == top_op || GF_CLI_TOP_WRITE_PERF == top_op)) {
- ret = dict_get_uint32(dict, "blk-size", &blk_size);
- if (ret)
- goto cont;
- ret = dict_get_uint32(dict, "blk-cnt", &blk_count);
- if (ret)
- goto cont;
-
- if (GF_CLI_TOP_READ_PERF == top_op) {
- ret = glusterfs_volume_top_read_perf(
- blk_size, blk_count, xlator_req.name, &throughput, &time);
- } else if (GF_CLI_TOP_WRITE_PERF == top_op) {
- ret = glusterfs_volume_top_write_perf(
- blk_size, blk_count, xlator_req.name, &throughput, &time);
- }
- if (ret)
- goto cont;
- ret = dict_set_double(dict, "time", time);
- if (ret)
- goto cont;
- ret = dict_set_double(dict, "throughput", throughput);
- if (ret)
- goto cont;
+ if (ret)
+ goto cont;
+ if (GF_CLI_TOP_READ_PERF == top_op) {
+ ret = glusterfs_volume_top_perf(xlator_req.name, dict, _gf_false);
+ } else if (GF_CLI_TOP_WRITE_PERF == top_op) {
+ ret = glusterfs_volume_top_perf(xlator_req.name, dict, _gf_true);
}
+
cont:
ctx = glusterfsd_ctx;
GF_ASSERT(ctx);
active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
any = active->first;
xlator = get_xlator_by_name(any, xlator_req.name);
@@ -456,6 +555,8 @@ out:
free(xlator_req.name);
free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
if (output)
dict_unref(output);
if (dict)
@@ -463,13 +564,12 @@ out:
return ret;
}
-int
-glusterfs_volume_top_write_perf(uint32_t blk_size, uint32_t blk_count,
- char *brick_path, double *throughput,
- double *time)
+static int
+glusterfs_volume_top_perf(const char *brick_path, dict_t *dict,
+ gf_boolean_t write_test)
{
int32_t fd = -1;
- int32_t input_fd = -1;
+ int32_t output_fd = -1;
char export_path[PATH_MAX] = {
0,
};
@@ -477,46 +577,44 @@ glusterfs_volume_top_write_perf(uint32_t blk_size, uint32_t blk_count,
int32_t iter = 0;
int32_t ret = -1;
uint64_t total_blks = 0;
+ uint32_t blk_size;
+ uint32_t blk_count;
+ double throughput = 0;
+ double time = 0;
struct timeval begin, end = {
0,
};
GF_ASSERT(brick_path);
- GF_ASSERT(throughput);
- GF_ASSERT(time);
- if (!(blk_size > 0) || !(blk_count > 0))
- goto out;
- snprintf(export_path, sizeof(export_path), "%s/%s", brick_path,
- ".gf-tmp-stats-perf");
+ ret = dict_get_uint32(dict, "blk-size", &blk_size);
+ if (ret)
+ goto out;
+ ret = dict_get_uint32(dict, "blk-cnt", &blk_count);
+ if (ret)
+ goto out;
- fd = open(export_path, O_CREAT | O_RDWR, S_IRWXU);
- if (-1 == fd) {
- ret = -1;
- gf_log("glusterd", GF_LOG_ERROR, "Could not open tmp file");
+ if (!(blk_size > 0) || !(blk_count > 0))
goto out;
- }
- buf = GF_MALLOC(blk_size * sizeof(*buf), gf_common_mt_char);
+ buf = GF_CALLOC(1, blk_size * sizeof(*buf), gf_common_mt_char);
if (!buf) {
ret = -1;
+ gf_log("glusterd", GF_LOG_ERROR, "Could not allocate memory");
goto out;
}
- input_fd = open("/dev/zero", O_RDONLY);
- if (-1 == input_fd) {
+ snprintf(export_path, sizeof(export_path), "%s/%s", brick_path,
+ ".gf-tmp-stats-perf");
+ fd = open(export_path, O_CREAT | O_RDWR, S_IRWXU);
+ if (-1 == fd) {
ret = -1;
- gf_log("glusterd", GF_LOG_ERROR, "Unable to open input file");
+ gf_log("glusterd", GF_LOG_ERROR, "Could not open tmp file");
goto out;
}
gettimeofday(&begin, NULL);
for (iter = 0; iter < blk_count; iter++) {
- ret = sys_read(input_fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
ret = sys_write(fd, buf, blk_size);
if (ret != blk_size) {
ret = -1;
@@ -524,77 +622,36 @@ glusterfs_volume_top_write_perf(uint32_t blk_size, uint32_t blk_count,
}
total_blks += ret;
}
- ret = 0;
+ gettimeofday(&end, NULL);
if (total_blks != ((uint64_t)blk_size * blk_count)) {
gf_log("glusterd", GF_LOG_WARNING, "Error in write");
ret = -1;
goto out;
}
- gettimeofday(&end, NULL);
- *time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
- *throughput = total_blks / *time;
+ time = gf_tvdiff(&begin, &end);
+ throughput = total_blks / time;
gf_log("glusterd", GF_LOG_INFO,
"Throughput %.2f Mbps time %.2f secs "
"bytes written %" PRId64,
- *throughput, *time, total_blks);
-
-out:
- if (fd >= 0)
- sys_close(fd);
- if (input_fd >= 0)
- sys_close(input_fd);
- GF_FREE(buf);
- sys_unlink(export_path);
-
- return ret;
-}
-
-int
-glusterfs_volume_top_read_perf(uint32_t blk_size, uint32_t blk_count,
- char *brick_path, double *throughput,
- double *time)
-{
- int32_t fd = -1;
- int32_t input_fd = -1;
- int32_t output_fd = -1;
- char export_path[PATH_MAX] = {
- 0,
- };
- char *buf = NULL;
- int32_t iter = 0;
- int32_t ret = -1;
- uint64_t total_blks = 0;
- struct timeval begin, end = {
- 0,
- };
-
- GF_ASSERT(brick_path);
- GF_ASSERT(throughput);
- GF_ASSERT(time);
- if (!(blk_size > 0) || !(blk_count > 0))
- goto out;
+ throughput, time, total_blks);
- snprintf(export_path, sizeof(export_path), "%s/%s", brick_path,
- ".gf-tmp-stats-perf");
- fd = open(export_path, O_CREAT | O_RDWR, S_IRWXU);
- if (-1 == fd) {
- ret = -1;
- gf_log("glusterd", GF_LOG_ERROR, "Could not open tmp file");
+ /* if it's a write test, we are done. Otherwise, we continue to the read
+ * part */
+ if (write_test == _gf_true) {
+ ret = 0;
goto out;
}
- buf = GF_MALLOC(blk_size * sizeof(*buf), gf_common_mt_char);
- if (!buf) {
- ret = -1;
- gf_log("glusterd", GF_LOG_ERROR, "Could not allocate memory");
+ ret = sys_fsync(fd);
+ if (ret) {
+ gf_log("glusterd", GF_LOG_ERROR, "could not flush cache");
goto out;
}
-
- input_fd = open("/dev/zero", O_RDONLY);
- if (-1 == input_fd) {
+ ret = sys_lseek(fd, 0L, 0);
+ if (ret != 0) {
+ gf_log("glusterd", GF_LOG_ERROR, "could not seek back to start");
ret = -1;
- gf_log("glusterd", GF_LOG_ERROR, "Could not open input file");
goto out;
}
@@ -605,30 +662,8 @@ glusterfs_volume_top_read_perf(uint32_t blk_size, uint32_t blk_count,
goto out;
}
- for (iter = 0; iter < blk_count; iter++) {
- ret = sys_read(input_fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- ret = sys_write(fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- }
+ total_blks = 0;
- ret = sys_fsync(fd);
- if (ret) {
- gf_log("glusterd", GF_LOG_ERROR, "could not flush cache");
- goto out;
- }
- ret = sys_lseek(fd, 0L, 0);
- if (ret != 0) {
- gf_log("glusterd", GF_LOG_ERROR, "could not seek back to start");
- ret = -1;
- goto out;
- }
gettimeofday(&begin, NULL);
for (iter = 0; iter < blk_count; iter++) {
ret = sys_read(fd, buf, blk_size);
@@ -643,31 +678,36 @@ glusterfs_volume_top_read_perf(uint32_t blk_size, uint32_t blk_count,
}
total_blks += ret;
}
- ret = 0;
+ gettimeofday(&end, NULL);
if (total_blks != ((uint64_t)blk_size * blk_count)) {
ret = -1;
gf_log("glusterd", GF_LOG_WARNING, "Error in read");
goto out;
}
- gettimeofday(&end, NULL);
- *time = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
- *throughput = total_blks / *time;
+ time = gf_tvdiff(&begin, &end);
+ throughput = total_blks / time;
gf_log("glusterd", GF_LOG_INFO,
"Throughput %.2f Mbps time %.2f secs "
"bytes read %" PRId64,
- *throughput, *time, total_blks);
-
+ throughput, time, total_blks);
+ ret = 0;
out:
if (fd >= 0)
sys_close(fd);
- if (input_fd >= 0)
- sys_close(input_fd);
if (output_fd >= 0)
sys_close(output_fd);
GF_FREE(buf);
sys_unlink(export_path);
-
+ if (ret == 0) {
+ ret = dict_set_double(dict, "time", time);
+ if (ret)
+ goto end;
+ ret = dict_set_double(dict, "throughput", throughput);
+ if (ret)
+ goto end;
+ }
+end:
return ret;
}
@@ -683,7 +723,8 @@ glusterfs_handle_translator_op(rpcsvc_request_t *req)
xlator_t *xlator = NULL;
xlator_t *any = NULL;
dict_t *output = NULL;
- char key[2048] = {0};
+ char key[32] = {0};
+ int len;
char *xname = NULL;
glusterfs_ctx_t *ctx = NULL;
glusterfs_graph_t *active = NULL;
@@ -707,10 +748,8 @@ glusterfs_handle_translator_op(rpcsvc_request_t *req)
active = ctx->active;
if (!active) {
ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, EAGAIN, glusterfsd_msg_38,
- "Not processing brick-op no. %d since volume graph is "
- "not yet active.",
- xlator_req.op);
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, glusterfsd_msg_38,
+ "brick-op_no.=%d", xlator_req.op, NULL);
goto out;
}
any = active->first;
@@ -735,8 +774,8 @@ glusterfs_handle_translator_op(rpcsvc_request_t *req)
}
for (i = 0; i < count; i++) {
- snprintf(key, sizeof(key), "xl-%d", i);
- ret = dict_get_str(input, key, &xname);
+ len = snprintf(key, sizeof(key), "xl-%d", i);
+ ret = dict_get_strn(input, key, len, &xname);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
"Couldn't get "
@@ -754,8 +793,8 @@ glusterfs_handle_translator_op(rpcsvc_request_t *req)
}
}
for (i = 0; i < count; i++) {
- snprintf(key, sizeof(key), "xl-%d", i);
- ret = dict_get_str(input, key, &xname);
+ len = snprintf(key, sizeof(key), "xl-%d", i);
+ ret = dict_get_strn(input, key, len, &xname);
xlator = xlator_search_by_name(any, xname);
XLATOR_NOTIFY(ret, xlator, GF_EVENT_TRANSLATOR_OP, input, output);
/* If notify fails for an xlator we need to capture it but
@@ -829,8 +868,7 @@ glusterfs_handle_bitrot(rpcsvc_request_t *req)
xlator_req.input.input_len, &input);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_35,
- "rpc req buffer unserialization failed.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_35, NULL);
goto out;
}
@@ -839,8 +877,7 @@ glusterfs_handle_bitrot(rpcsvc_request_t *req)
xlator = xlator_search_by_name(any, xname);
if (!xlator) {
snprintf(msg, sizeof(msg), "xlator %s is not loaded", xname);
- gf_msg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_36,
- "problem in xlator loading.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_36, NULL);
goto out;
}
@@ -853,8 +890,7 @@ glusterfs_handle_bitrot(rpcsvc_request_t *req)
ret = dict_get_str(input, "scrub-value", &scrub_opt);
if (ret) {
snprintf(msg, sizeof(msg), "Failed to get scrub value");
- gf_msg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_37,
- "failed to get dict value");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_37, NULL);
ret = -1;
goto out;
}
@@ -877,6 +913,8 @@ out:
if (input)
dict_unref(input);
free(xlator_req.input.input_val); /*malloced by xdr*/
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
if (output)
dict_unref(output);
free(xlator_req.name);
@@ -919,44 +957,51 @@ glusterfs_handle_attach(rpcsvc_request_t *req)
}
ret = 0;
+ if (!this->ctx->active) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "got attach for %s but no active graph", xlator_req.name);
+ goto post_unlock;
+ }
+
+ gf_log(this->name, GF_LOG_INFO, "got attach for %s", xlator_req.name);
+
LOCK(&ctx->volfile_lock);
{
- if (this->ctx->active) {
- gf_log(this->name, GF_LOG_INFO, "got attach for %s",
- xlator_req.name);
- ret = glusterfs_graph_attach(this->ctx->active, xlator_req.name,
- &newgraph);
- if (!ret && (newgraph && newgraph->first)) {
- nextchild = newgraph->first;
- ret = xlator_notify(nextchild, GF_EVENT_PARENT_UP, nextchild);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- LG_MSG_EVENT_NOTIFY_FAILED,
- "Parent up notification "
- "failed for %s ",
- nextchild->name);
- goto out;
- }
- /* we need a protocol/server xlator as
- * nextchild
- */
- srv_xl = this->ctx->active->first;
- srv_conf = (server_conf_t *)srv_xl->private;
- rpcsvc_autoscale_threads(this->ctx, srv_conf->rpc, 1);
+ ret = glusterfs_graph_attach(this->ctx->active, xlator_req.name,
+ &newgraph);
+ if (!ret && (newgraph && newgraph->first)) {
+ nextchild = newgraph->first;
+ ret = xlator_notify(nextchild, GF_EVENT_PARENT_UP, nextchild);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "event=ParentUp", "name=%s", nextchild->name, NULL);
+ goto unlock;
}
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "got attach for %s but no active graph", xlator_req.name);
+ /* we need a protocol/server xlator as
+ * nextchild
+ */
+ srv_xl = this->ctx->active->first;
+ srv_conf = (server_conf_t *)srv_xl->private;
+ rpcsvc_autoscale_threads(this->ctx, srv_conf->rpc, 1);
}
if (ret) {
ret = -1;
}
-
- glusterfs_translator_info_response_send(req, ret, NULL, NULL);
-
- out:
+ ret = glusterfs_translator_info_response_send(req, ret, NULL, NULL);
+ if (ret) {
+ /* Response sent back to glusterd, req is already destroyed. So
+ * resetting the ret to 0. Otherwise another response will be
+ * send from rpcsvc_check_and_reply_error. Which will lead to
+ * double resource leak.
+ */
+ ret = 0;
+ }
+ unlock:
UNLOCK(&ctx->volfile_lock);
}
+post_unlock:
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
free(xlator_req.input.input_val);
free(xlator_req.name);
@@ -964,6 +1009,122 @@ glusterfs_handle_attach(rpcsvc_request_t *req)
}
int
+glusterfs_handle_svc_attach(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41, "volfile-id=%s",
+ xlator_req.name, NULL);
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(xlator_req.dict.dict_val, xlator_req.dict.dict_len,
+ &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, NULL);
+ goto out;
+ }
+ dict->extra_stdfree = xlator_req.dict.dict_val;
+
+ ret = 0;
+
+ ret = mgmt_process_volfile(xlator_req.input.input_val,
+ xlator_req.input.input_len, xlator_req.name,
+ dict);
+out:
+ if (dict)
+ dict_unref(dict);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.name)
+ free(xlator_req.name);
+ glusterfs_translator_info_response_send(req, ret, NULL, NULL);
+ return 0;
+}
+
+int
+glusterfs_handle_svc_detach(rpcsvc_request_t *req)
+{
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ ssize_t ret;
+ gf_volfile_t *volfile_obj = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_volfile_t *volfile_tmp = NULL;
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ctx = glusterfsd_ctx;
+
+ LOCK(&ctx->volfile_lock);
+ {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(xlator_req.name, volfile_obj->vol_id)) {
+ volfile_tmp = volfile_obj;
+ break;
+ }
+ }
+
+ if (!volfile_tmp) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_041, "name=%s",
+ xlator_req.name, NULL);
+ /*
+ * Used to be -ENOENT. However, the caller asked us to
+ * make sure it's down and if it's already down that's
+ * good enough.
+ */
+ ret = 0;
+ goto out;
+ }
+ /* coverity[ORDER_REVERSAL] */
+ ret = glusterfs_process_svc_detach(ctx, volfile_tmp);
+ if (ret) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, glusterfsd_msg_042,
+ NULL);
+ goto out;
+ }
+ }
+ UNLOCK(&ctx->volfile_lock);
+out:
+ glusterfs_terminate_response_send(req, ret);
+ free(xlator_req.name);
+ xlator_req.name = NULL;
+
+ return 0;
+}
+
+int
glusterfs_handle_dump_metrics(rpcsvc_request_t *req)
{
int32_t ret = -1;
@@ -1007,10 +1168,8 @@ glusterfs_handle_dump_metrics(rpcsvc_request_t *req)
goto out;
if (statbuf.st_size > GF_UNIT_MB) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LG_MSG_NO_MEMORY,
- "Allocated size exceeds expectation: "
- "reconsider logic (%" PRId64 ")",
- statbuf.st_size);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, LG_MSG_NO_MEMORY,
+ "reconsider logic (%" PRId64 ")", statbuf.st_size, NULL);
}
msg = GF_CALLOC(1, (statbuf.st_size + 1), gf_common_mt_char);
if (!msg)
@@ -1030,6 +1189,10 @@ out:
GF_FREE(msg);
GF_FREE(filepath);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
return ret;
}
@@ -1102,6 +1265,8 @@ out:
if (dict)
dict_unref(dict);
free(xlator_req.input.input_val); // malloced by xdr
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
if (output)
dict_unref(output);
free(xlator_req.name); // malloced by xdr
@@ -1125,7 +1290,6 @@ glusterfs_handle_brick_status(rpcsvc_request_t *req)
xlator_t *brick_xl = NULL;
dict_t *dict = NULL;
dict_t *output = NULL;
- char *xname = NULL;
uint32_t cmd = 0;
char *msg = NULL;
char *brickname = NULL;
@@ -1188,7 +1352,7 @@ glusterfs_handle_brick_status(rpcsvc_request_t *req)
brick_xl = get_xlator_by_name(server_xl, brickname);
if (!brick_xl) {
- gf_log(this->name, GF_LOG_ERROR, "xlator %s is not loaded", xname);
+ gf_log(this->name, GF_LOG_ERROR, "xlator is not loaded");
ret = -1;
goto out;
}
@@ -1250,7 +1414,9 @@ out:
if (output)
dict_unref(output);
free(brick_req.input.input_val);
- GF_FREE(xname);
+ if (brick_req.dict.dict_val)
+ free(brick_req.dict.dict_val);
+ free(brick_req.name);
GF_FREE(msg);
GF_FREE(rsp.output.output_val);
@@ -1321,10 +1487,12 @@ glusterfs_handle_node_status(rpcsvc_request_t *req)
}
any = active->first;
- if ((cmd & GF_CLI_STATUS_NFS) != 0)
- ret = gf_asprintf(&node_name, "%s", "nfs-server");
- else if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ if ((cmd & GF_CLI_STATUS_SHD) != 0)
ret = gf_asprintf(&node_name, "%s", "glustershd");
+#ifdef BUILD_GNFS
+ else if ((cmd & GF_CLI_STATUS_NFS) != 0)
+ ret = gf_asprintf(&node_name, "%s", "nfs-server");
+#endif
else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0)
ret = gf_asprintf(&node_name, "%s", "quotad");
else if ((cmd & GF_CLI_STATUS_BITD) != 0)
@@ -1384,7 +1552,7 @@ glusterfs_handle_node_status(rpcsvc_request_t *req)
break;
case GF_CLI_STATUS_CLIENTS:
- // clients not availbale for SHD
+ // clients not available for SHD
if ((cmd & GF_CLI_STATUS_SHD) != 0)
break;
@@ -1442,6 +1610,8 @@ out:
if (dict)
dict_unref(dict);
free(node_req.input.input_val);
+ if (node_req.dict.dict_val)
+ free(node_req.dict.dict_val);
GF_FREE(msg);
GF_FREE(rsp.output.output_val);
GF_FREE(node_name);
@@ -1499,6 +1669,11 @@ glusterfs_handle_nfs_profile(rpcsvc_request_t *req)
GF_ASSERT(ctx);
active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
any = active->first;
// is this needed?
@@ -1540,6 +1715,8 @@ glusterfs_handle_nfs_profile(rpcsvc_request_t *req)
out:
free(nfs_req.input.input_val);
+ if (nfs_req.dict.dict_val)
+ free(nfs_req.dict.dict_val);
if (dict)
dict_unref(dict);
if (output)
@@ -1618,6 +1795,8 @@ out:
if (dict)
dict_unref(dict);
free(xlator_req.input.input_val); // malloced by xdr
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
if (output)
dict_unref(output);
free(xlator_req.name); // malloced by xdr
@@ -1642,7 +1821,6 @@ glusterfs_handle_barrier(rpcsvc_request_t *req)
xlator_t *old_THIS = NULL;
dict_t *dict = NULL;
gf_boolean_t barrier = _gf_true;
- gf_boolean_t barrier_err = _gf_false;
xlator_list_t *trav;
GF_ASSERT(req);
@@ -1657,6 +1835,11 @@ glusterfs_handle_barrier(rpcsvc_request_t *req)
ctx = glusterfsd_ctx;
GF_ASSERT(ctx);
active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
top = active->first;
for (trav = top->children; trav; trav = trav->next) {
@@ -1713,8 +1896,6 @@ glusterfs_handle_barrier(rpcsvc_request_t *req)
barrier = dict_get_str_boolean(dict, "barrier", _gf_true);
if (barrier)
goto submit_reply;
- else
- barrier_err = _gf_true;
}
/* Reset THIS so that we have it correct in case of an error below
@@ -1739,9 +1920,6 @@ glusterfs_handle_barrier(rpcsvc_request_t *req)
goto submit_reply;
}
- if (barrier_err)
- ret = -1;
-
submit_reply:
THIS = old_THIS;
@@ -1752,7 +1930,8 @@ out:
if (dict)
dict_unref(dict);
free(brick_req.input.input_val);
-
+ if (brick_req.dict.dict_val)
+ free(brick_req.dict.dict_val);
gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
@@ -1765,14 +1944,14 @@ glusterfs_handle_rpc_msg(rpcsvc_request_t *req)
return ret;
}
-rpcclnt_cb_actor_t mgmt_cbk_actors[GF_CBK_MAXVALUE] = {
- [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec},
- [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
- mgmt_cbk_event},
- [GF_CBK_STATEDUMP] = {"STATEDUMP", GF_CBK_STATEDUMP, mgmt_cbk_event},
+static rpcclnt_cb_actor_t mgmt_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", mgmt_cbk_spec, GF_CBK_FETCHSPEC},
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", mgmt_cbk_event,
+ GF_CBK_EVENT_NOTIFY},
+ [GF_CBK_STATEDUMP] = {"STATEDUMP", mgmt_cbk_event, GF_CBK_STATEDUMP},
};
-struct rpcclnt_cb_program mgmt_cbk_prog = {
+static struct rpcclnt_cb_program mgmt_cbk_prog = {
.progname = "GlusterFS Callback",
.prognum = GLUSTER_CBK_PROGRAM,
.progver = GLUSTER_CBK_VERSION,
@@ -1780,7 +1959,7 @@ struct rpcclnt_cb_program mgmt_cbk_prog = {
.numactors = GF_CBK_MAXVALUE,
};
-char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
+static char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
[GF_PMAP_NULL] = "NULL",
[GF_PMAP_PORTBYBRICK] = "PORTBYBRICK",
[GF_PMAP_BRICKBYPORT] = "BRICKBYPORT",
@@ -1789,14 +1968,14 @@ char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
[GF_PMAP_SIGNUP] = "SIGNUP", /* DEPRECATED - DON'T USE! */
};
-rpc_clnt_prog_t clnt_pmap_prog = {
+static rpc_clnt_prog_t clnt_pmap_prog = {
.progname = "Gluster Portmap",
.prognum = GLUSTER_PMAP_PROGRAM,
.progver = GLUSTER_PMAP_VERSION,
.procnames = clnt_pmap_procs,
};
-char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
[GF_HNDSK_NULL] = "NULL",
[GF_HNDSK_SETVOLUME] = "SETVOLUME",
[GF_HNDSK_GETSPEC] = "GETSPEC",
@@ -1804,50 +1983,55 @@ char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
[GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
};
-rpc_clnt_prog_t clnt_handshake_prog = {
+static rpc_clnt_prog_t clnt_handshake_prog = {
.progname = "GlusterFS Handshake",
.prognum = GLUSTER_HNDSK_PROGRAM,
.progver = GLUSTER_HNDSK_VERSION,
.procnames = clnt_handshake_procs,
};
-rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = {
- [GLUSTERD_BRICK_NULL] = {"NULL", GLUSTERD_BRICK_NULL,
- glusterfs_handle_rpc_msg, NULL, 0, DRC_NA},
- [GLUSTERD_BRICK_TERMINATE] = {"TERMINATE", GLUSTERD_BRICK_TERMINATE,
- glusterfs_handle_terminate, NULL, 0, DRC_NA},
+static rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", glusterfs_handle_rpc_msg, NULL,
+ GLUSTERD_BRICK_NULL, DRC_NA, 0},
+ [GLUSTERD_BRICK_TERMINATE] = {"TERMINATE", glusterfs_handle_terminate, NULL,
+ GLUSTERD_BRICK_TERMINATE, DRC_NA, 0},
[GLUSTERD_BRICK_XLATOR_INFO] = {"TRANSLATOR INFO",
- GLUSTERD_BRICK_XLATOR_INFO,
glusterfs_handle_translator_info_get, NULL,
- 0, DRC_NA},
- [GLUSTERD_BRICK_XLATOR_OP] = {"TRANSLATOR OP", GLUSTERD_BRICK_XLATOR_OP,
- glusterfs_handle_translator_op, NULL, 0,
- DRC_NA},
- [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS,
- glusterfs_handle_brick_status, NULL, 0, DRC_NA},
+ GLUSTERD_BRICK_XLATOR_INFO, DRC_NA, 0},
+ [GLUSTERD_BRICK_XLATOR_OP] = {"TRANSLATOR OP",
+ glusterfs_handle_translator_op, NULL,
+ GLUSTERD_BRICK_XLATOR_OP, DRC_NA, 0},
+ [GLUSTERD_BRICK_STATUS] = {"STATUS", glusterfs_handle_brick_status, NULL,
+ GLUSTERD_BRICK_STATUS, DRC_NA, 0},
[GLUSTERD_BRICK_XLATOR_DEFRAG] = {"TRANSLATOR DEFRAG",
- GLUSTERD_BRICK_XLATOR_DEFRAG,
- glusterfs_handle_defrag, NULL, 0, DRC_NA},
- [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", GLUSTERD_NODE_PROFILE,
- glusterfs_handle_nfs_profile, NULL, 0, DRC_NA},
- [GLUSTERD_NODE_STATUS] = {"NFS STATUS", GLUSTERD_NODE_STATUS,
- glusterfs_handle_node_status, NULL, 0, DRC_NA},
+ glusterfs_handle_defrag, NULL,
+ GLUSTERD_BRICK_XLATOR_DEFRAG, DRC_NA, 0},
+ [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", glusterfs_handle_nfs_profile,
+ NULL, GLUSTERD_NODE_PROFILE, DRC_NA, 0},
+ [GLUSTERD_NODE_STATUS] = {"NFS STATUS", glusterfs_handle_node_status, NULL,
+ GLUSTERD_NODE_STATUS, DRC_NA, 0},
[GLUSTERD_VOLUME_BARRIER_OP] = {"VOLUME BARRIER OP",
- GLUSTERD_VOLUME_BARRIER_OP,
- glusterfs_handle_volume_barrier_op, NULL, 0,
- DRC_NA},
- [GLUSTERD_BRICK_BARRIER] = {"BARRIER", GLUSTERD_BRICK_BARRIER,
- glusterfs_handle_barrier, NULL, 0, DRC_NA},
- [GLUSTERD_NODE_BITROT] = {"BITROT", GLUSTERD_NODE_BITROT,
- glusterfs_handle_bitrot, NULL, 0, DRC_NA},
- [GLUSTERD_BRICK_ATTACH] = {"ATTACH", GLUSTERD_BRICK_ATTACH,
- glusterfs_handle_attach, NULL, 0, DRC_NA},
-
- [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", GLUSTERD_DUMP_METRICS,
- glusterfs_handle_dump_metrics, NULL, 0, DRC_NA},
+ glusterfs_handle_volume_barrier_op, NULL,
+ GLUSTERD_VOLUME_BARRIER_OP, DRC_NA, 0},
+ [GLUSTERD_BRICK_BARRIER] = {"BARRIER", glusterfs_handle_barrier, NULL,
+ GLUSTERD_BRICK_BARRIER, DRC_NA, 0},
+ [GLUSTERD_NODE_BITROT] = {"BITROT", glusterfs_handle_bitrot, NULL,
+ GLUSTERD_NODE_BITROT, DRC_NA, 0},
+ [GLUSTERD_BRICK_ATTACH] = {"ATTACH", glusterfs_handle_attach, NULL,
+ GLUSTERD_BRICK_ATTACH, DRC_NA, 0},
+
+ [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", glusterfs_handle_dump_metrics,
+ NULL, GLUSTERD_DUMP_METRICS, DRC_NA, 0},
+
+ [GLUSTERD_SVC_ATTACH] = {"ATTACH CLIENT", glusterfs_handle_svc_attach, NULL,
+ GLUSTERD_SVC_ATTACH, DRC_NA, 0},
+
+ [GLUSTERD_SVC_DETACH] = {"DETACH CLIENT", glusterfs_handle_svc_detach, NULL,
+ GLUSTERD_SVC_DETACH, DRC_NA, 0},
+
};
-struct rpcsvc_program glusterfs_mop_prog = {
+static struct rpcsvc_program glusterfs_mop_prog = {
.progname = "Gluster Brick operations",
.prognum = GD_BRICK_PROGRAM,
.progver = GD_BRICK_VERSION,
@@ -1975,10 +2159,12 @@ mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
dict->extra_stdfree = rsp.xdata.xdata_val;
- /* glusterd2 only */
ret = dict_get_str(dict, "servers-list", &servers_list);
if (ret) {
- goto volfile;
+ /* Server list is set by glusterd at the time of getspec */
+ ret = dict_get_str(dict, GLUSTERD_BRICK_SERVERS, &servers_list);
+ if (ret)
+ goto volfile;
}
gf_log(frame->this->name, GF_LOG_INFO,
@@ -1992,14 +2178,18 @@ mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
volfile:
- ret = 0;
size = rsp.op_ret;
+ volfile_id = frame->local;
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id,
+ dict);
+ goto post_graph_mgmt;
+ }
+ ret = 0;
glusterfs_compute_sha256((const unsigned char *)rsp.spec, size,
sha256_hash);
- volfile_id = frame->local;
-
LOCK(&ctx->volfile_lock);
{
locked = 1;
@@ -2009,10 +2199,11 @@ volfile:
if (!strcmp(volfile_id, volfile_obj->vol_id)) {
if (!memcmp(sha256_hash, volfile_obj->volfile_checksum,
sizeof(volfile_obj->volfile_checksum))) {
+ UNLOCK(&ctx->volfile_lock);
gf_log(frame->this->name, GF_LOG_INFO,
"No change in volfile,"
"continuing");
- goto out;
+ goto post_unlock;
}
volfile_tmp = volfile_obj;
break;
@@ -2022,10 +2213,11 @@ volfile:
/* coverity[secure_temp] mkstemp uses 0600 as the mode */
tmp_fd = mkstemp(template);
if (-1 == tmp_fd) {
- gf_msg(frame->this->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
- "Unable to create temporary file: %s", template);
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
+ "create template=%s", template, NULL);
ret = -1;
- goto out;
+ goto post_unlock;
}
/* Calling unlink so that when the file is closed or program
@@ -2033,8 +2225,8 @@ volfile:
*/
ret = sys_unlink(template);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
- "Unable to delete temporary file: %s", template);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
+ "delete template=%s", template, NULL);
ret = 0;
}
@@ -2066,11 +2258,11 @@ volfile:
"No need to re-load volfile, reconfigure done");
if (!volfile_tmp) {
ret = -1;
+ UNLOCK(&ctx->volfile_lock);
gf_log("mgmt", GF_LOG_ERROR,
- "Graph "
- "reconfigure succeeded with out having "
+ "Graph reconfigure succeeded with out having "
"checksum.");
- goto out;
+ goto post_unlock;
}
memcpy(volfile_tmp->volfile_checksum, sha256_hash,
sizeof(volfile_tmp->volfile_checksum));
@@ -2078,8 +2270,9 @@ volfile:
}
if (ret < 0) {
+ UNLOCK(&ctx->volfile_lock);
gf_log("glusterfsd-mgmt", GF_LOG_DEBUG, "Reconfigure failed !!");
- goto out;
+ goto post_unlock;
}
ret = glusterfs_process_volfp(ctx, tmpfp);
@@ -2098,6 +2291,7 @@ volfile:
}
INIT_LIST_HEAD(&volfile_tmp->volfile_list);
+ volfile_tmp->graph = ctx->active;
list_add(&volfile_tmp->volfile_list, &ctx->volfile_list);
snprintf(volfile_tmp->vol_id, sizeof(volfile_tmp->vol_id), "%s",
volfile_id);
@@ -2109,6 +2303,7 @@ volfile:
locked = 0;
+post_graph_mgmt:
if (!is_mgmt_rpc_reconnect) {
need_emancipate = 1;
glusterfs_mgmt_pmap_signin(ctx);
@@ -2119,11 +2314,10 @@ out:
if (locked)
UNLOCK(&ctx->volfile_lock);
-
+post_unlock:
GF_FREE(frame->local);
frame->local = NULL;
STACK_DESTROY(frame->root);
-
free(rsp.spec);
if (dict)
@@ -2263,10 +2457,21 @@ glusterfs_volfile_fetch(glusterfs_ctx_t *ctx)
{
xlator_t *server_xl = NULL;
xlator_list_t *trav;
- int ret;
+ gf_volfile_t *volfile_obj = NULL;
+ int ret = 0;
LOCK(&ctx->volfile_lock);
{
+ if (ctx->active &&
+ mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ ret |= glusterfs_volfile_fetch_one(ctx, volfile_obj->vol_id);
+ }
+ UNLOCK(&ctx->volfile_lock);
+ return ret;
+ }
+
if (ctx->active) {
server_xl = ctx->active->first;
if (strcmp(server_xl->type, "protocol/server") != 0) {
@@ -2545,7 +2750,11 @@ glusterfs_listener_init(glusterfs_ctx_t *ctx)
if (!cmd_args->sock_file)
return 0;
- ret = rpcsvc_transport_unix_options_build(&options, cmd_args->sock_file);
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ ret = rpcsvc_transport_unix_options_build(options, cmd_args->sock_file);
if (ret)
goto out;
@@ -2572,50 +2781,8 @@ glusterfs_listener_init(glusterfs_ctx_t *ctx)
ctx->listener = rpc;
out:
- return ret;
-}
-
-int
-glusterfs_listener_stop(glusterfs_ctx_t *ctx)
-{
- cmd_args_t *cmd_args = NULL;
- rpcsvc_t *rpc = NULL;
- rpcsvc_listener_t *listener = NULL;
- rpcsvc_listener_t *next = NULL;
- int ret = 0;
- xlator_t *this = NULL;
-
- GF_ASSERT(ctx);
-
- rpc = ctx->listener;
- ctx->listener = NULL;
-
- (void)rpcsvc_program_unregister(rpc, &glusterfs_mop_prog);
-
- list_for_each_entry_safe(listener, next, &rpc->listeners, list)
- {
- rpcsvc_listener_destroy(listener);
- }
-
- (void)rpcsvc_unregister_notify(rpc, glusterfs_rpcsvc_notify, THIS);
-
- GF_FREE(rpc);
-
- cmd_args = &ctx->cmd_args;
- if (cmd_args->sock_file) {
- ret = sys_unlink(cmd_args->sock_file);
- if (ret && (ENOENT == errno)) {
- ret = 0;
- }
- }
-
- if (ret) {
- this = THIS;
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to unlink listener "
- "socket %s, error: %s",
- cmd_args->sock_file, strerror(errno));
- }
+ if (options)
+ dict_unref(options);
return ret;
}
@@ -2645,6 +2812,7 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
int ret = -1;
int port = GF_DEFAULT_BASE_PORT;
char *host = NULL;
+ xlator_cmdline_option_t *opt = NULL;
cmd_args = &ctx->cmd_args;
GF_VALIDATE_OR_GOTO(THIS->name, cmd_args->volfile_server, out);
@@ -2652,6 +2820,10 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
if (ctx->mgmt)
return 0;
+ options = dict_new();
+ if (!options)
+ goto out;
+
LOCK_INIT(&ctx->volfile_lock);
if (cmd_args->volfile_server_port)
@@ -2661,9 +2833,11 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
if (cmd_args->volfile_server_transport &&
!strcmp(cmd_args->volfile_server_transport, "unix")) {
- ret = rpc_transport_unix_options_build(&options, host, 0);
+ ret = rpc_transport_unix_options_build(options, host, 0);
} else {
- ret = rpc_transport_inet_options_build(&options, host, port);
+ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt ? opt->value : NULL));
}
if (ret)
goto out;
@@ -2711,6 +2885,8 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
ret = rpc_clnt_start(rpc);
out:
+ if (options)
+ dict_unref(options);
return ret;
}
@@ -2842,11 +3018,7 @@ glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx)
int ret = -1;
int emancipate_ret = -1;
cmd_args_t *cmd_args = NULL;
- char brick_name[PATH_MAX] = {
- 0,
- };
- frame = create_frame(THIS, ctx->pool);
cmd_args = &ctx->cmd_args;
if (!cmd_args->brick_port || !cmd_args->brick_name) {
@@ -2856,20 +3028,13 @@ glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx)
goto out;
}
- if (cmd_args->volfile_server_transport &&
- !strcmp(cmd_args->volfile_server_transport, "rdma")) {
- snprintf(brick_name, sizeof(brick_name), "%s.rdma",
- cmd_args->brick_name);
- req.brick = brick_name;
- } else
- req.brick = cmd_args->brick_name;
-
req.port = cmd_args->brick_port;
req.pid = (int)getpid(); /* only glusterd2 consumes this */
if (ctx->active) {
top = ctx->active->first;
for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ frame = create_frame(THIS, ctx->pool);
req.brick = (*trav_p)->xlator->name;
ret = mgmt_submit_request(&req, frame, ctx, &clnt_pmap_prog,
GF_PMAP_SIGNIN, mgmt_pmap_signin_cbk,
@@ -2879,11 +3044,8 @@ glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx)
"failed to send sign in request; brick = %s", req.brick);
}
}
- } else {
- ret = mgmt_submit_request(&req, frame, ctx, &clnt_pmap_prog,
- GF_PMAP_SIGNIN, mgmt_pmap_signin_cbk,
- (xdrproc_t)xdr_pmap_signin_req);
}
+
/* unfortunately, the caller doesn't care about the returned value */
out:
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 771af423ea5..dae41f33fef 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -47,38 +47,32 @@
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
-#include "xlator.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "logging.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
#include "glusterfsd-messages.h"
-#include "dict.h"
-#include "list.h"
-#include "timer.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
#include "glusterfsd.h"
-#include "revision.h"
-#include "common-utils.h"
-#include "gf-event.h"
-#include "statedump.h"
-#include "latency.h"
+#include <glusterfs/revision.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/latency.h>
#include "glusterfsd-mem-types.h"
-#include "syscall.h"
-#include "call-stub.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/call-stub.h>
#include <fnmatch.h>
#include "rpc-clnt.h"
-#include "syncop.h"
-#include "client_t.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/client_t.h>
#include "netgroups.h"
#include "exports.h"
-#include "monitoring.h"
+#include <glusterfs/monitoring.h>
-#include "daemon.h"
+#include <glusterfs/daemon.h>
/* using argp for command line parsing */
static char gf_doc[] = "";
@@ -198,12 +192,13 @@ static struct argp_option gf_options[] = {
{"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,
"Brick Port to be registered with Gluster portmapper"},
{"fopen-keep-cache", ARGP_FOPEN_KEEP_CACHE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
- "Do not purge the cache on file open"},
+ "Do not purge the cache on file open [default: false]"},
{"global-timer-wheel", ARGP_GLOBAL_TIMER_WHEEL, "BOOL", OPTION_ARG_OPTIONAL,
"Instantiate process global timer-wheel"},
{"thin-client", ARGP_THIN_CLIENT_KEY, 0, 0,
"Enables thin mount and connects via gfproxyd daemon"},
-
+ {"global-threading", ARGP_GLOBAL_THREADING_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use the global thread pool instead of io-threads"},
{0, 0, 0, 0, "Fuse options:"},
{"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL|auto",
OPTION_ARG_OPTIONAL, "Specify direct I/O strategy [default: \"auto\"]"},
@@ -219,6 +214,12 @@ static struct argp_option gf_options[] = {
"[default: 300]"},
{"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0,
"Resolve all auxiliary groups in fuse translator (max 32 otherwise)"},
+ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
+ "Set fuse module's limit for number of inodes kept in LRU list to N "
+ "[default: 65536]"},
+ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
+ "Suspend inode invalidations implied by 'lru-limit' if the number of "
+ "outstanding invalidations reaches N"},
{"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
"Set fuse module's background queue length to N "
"[default: 64]"},
@@ -267,6 +268,15 @@ static struct argp_option gf_options[] = {
{"fuse-flush-handle-interrupt", ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY,
"BOOL", OPTION_ARG_OPTIONAL | OPTION_HIDDEN,
"handle interrupt in fuse FLUSH handler"},
+ {"auto-invalidation", ARGP_FUSE_AUTO_INVAL_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "controls whether fuse-kernel can auto-invalidate "
+ "attribute, dentry and page-cache. "
+ "Disable this only if same files/directories are not accessed across "
+ "two different mounts concurrently [default: \"on\"]"},
+ {"fuse-dev-eperm-ratelimit-ns", ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY,
+ "OPTIONS", OPTION_HIDDEN,
+ "rate limit reading from fuse device upon EPERM failure"},
+ {"brick-mux", ARGP_BRICK_MUX_KEY, 0, 0, "Enable brick mux. "},
{0, 0, 0, 0, "Miscellaneous Options:"},
{
0,
@@ -282,8 +292,12 @@ int
glusterfs_mgmt_init(glusterfs_ctx_t *ctx);
int
glusterfs_listener_init(glusterfs_ctx_t *ctx);
-int
-glusterfs_listener_stop(glusterfs_ctx_t *ctx);
+
+#define DICT_SET_VAL(method, dict, key, val, msgid) \
+ if (method(dict, key, val)) { \
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, msgid, "key=%s", key); \
+ goto err; \
+ }
static int
set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
@@ -305,172 +319,97 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
ret = gf_asprintf(&mount_point, "%s/%s", cwd,
cmd_args->mount_point);
if (ret == -1) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_1,
- "Could not create absolute mountpoint "
- "path");
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_1,
+ "gf_asprintf failed", NULL);
goto err;
}
} else {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_2,
- "Could not get current working directory");
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_2,
+ "getcwd failed", NULL);
goto err;
}
- } else
- mount_point = gf_strdup(cmd_args->mount_point);
- ret = dict_set_dynstr(options, ZR_MOUNTPOINT_OPT, mount_point);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_3,
- "failed to set mount-point to options dictionary");
- goto err;
+ } else {
+ mount_point = gf_strdup(cmd_args->mount_point);
}
+ DICT_SET_VAL(dict_set_dynstr_sizen, options, ZR_MOUNTPOINT_OPT, mount_point,
+ glusterfsd_msg_3);
if (cmd_args->fuse_attribute_timeout >= 0) {
- ret = dict_set_double(options, ZR_ATTR_TIMEOUT_OPT,
- cmd_args->fuse_attribute_timeout);
-
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_4,
- "failed to set dict value "
- "for key " ZR_ATTR_TIMEOUT_OPT);
- goto err;
- }
+ DICT_SET_VAL(dict_set_double, options, ZR_ATTR_TIMEOUT_OPT,
+ cmd_args->fuse_attribute_timeout, glusterfsd_msg_3);
}
if (cmd_args->fuse_entry_timeout >= 0) {
- ret = dict_set_double(options, ZR_ENTRY_TIMEOUT_OPT,
- cmd_args->fuse_entry_timeout);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key " ZR_ENTRY_TIMEOUT_OPT);
- goto err;
- }
+ DICT_SET_VAL(dict_set_double, options, ZR_ENTRY_TIMEOUT_OPT,
+ cmd_args->fuse_entry_timeout, glusterfsd_msg_3);
}
if (cmd_args->fuse_negative_timeout >= 0) {
- ret = dict_set_double(options, ZR_NEGATIVE_TIMEOUT_OPT,
- cmd_args->fuse_negative_timeout);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key " ZR_NEGATIVE_TIMEOUT_OPT);
- goto err;
- }
+ DICT_SET_VAL(dict_set_double, options, ZR_NEGATIVE_TIMEOUT_OPT,
+ cmd_args->fuse_negative_timeout, glusterfsd_msg_3);
}
if (cmd_args->client_pid_set) {
- ret = dict_set_int32(options, "client-pid", cmd_args->client_pid);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key client-pid");
- goto err;
- }
+ DICT_SET_VAL(dict_set_int32_sizen, options, "client-pid",
+ cmd_args->client_pid, glusterfsd_msg_3);
}
if (cmd_args->uid_map_root) {
- ret = dict_set_int32(options, "uid-map-root", cmd_args->uid_map_root);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "uid-map-root");
- goto err;
- }
+ DICT_SET_VAL(dict_set_int32_sizen, options, "uid-map-root",
+ cmd_args->uid_map_root, glusterfsd_msg_3);
}
if (cmd_args->volfile_check) {
- ret = dict_set_int32(options, ZR_STRICT_VOLFILE_CHECK,
- cmd_args->volfile_check);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key " ZR_STRICT_VOLFILE_CHECK);
- goto err;
- }
+ DICT_SET_VAL(dict_set_int32_sizen, options, ZR_STRICT_VOLFILE_CHECK,
+ cmd_args->volfile_check, glusterfsd_msg_3);
}
if (cmd_args->dump_fuse) {
- ret = dict_set_static_ptr(options, ZR_DUMP_FUSE, cmd_args->dump_fuse);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key " ZR_DUMP_FUSE);
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DUMP_FUSE,
+ cmd_args->dump_fuse, glusterfsd_msg_3);
}
if (cmd_args->acl) {
- ret = dict_set_static_ptr(options, "acl", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key acl");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "acl", "on",
+ glusterfsd_msg_3);
}
if (cmd_args->selinux) {
- ret = dict_set_static_ptr(options, "selinux", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key selinux");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "selinux", "on",
+ glusterfsd_msg_3);
}
if (cmd_args->capability) {
- ret = dict_set_static_ptr(options, "capability", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key capability");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "capability", "on",
+ glusterfsd_msg_3);
}
if (cmd_args->aux_gfid_mount) {
- ret = dict_set_static_ptr(options, "virtual-gfid-access", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "aux-gfid-mount");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "virtual-gfid-access", "on",
+ glusterfsd_msg_3);
}
if (cmd_args->enable_ino32) {
- ret = dict_set_static_ptr(options, "enable-ino32", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "enable-ino32");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "enable-ino32", "on",
+ glusterfsd_msg_3);
}
if (cmd_args->read_only) {
- ret = dict_set_static_ptr(options, "read-only", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key read-only");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "read-only", "on",
+ glusterfsd_msg_3);
}
switch (cmd_args->fopen_keep_cache) {
case GF_OPTION_ENABLE:
- ret = dict_set_static_ptr(options, "fopen-keep-cache", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "fopen-keep-cache");
- goto err;
- }
+
+ DICT_SET_VAL(dict_set_static_ptr, options, "fopen-keep-cache", "on",
+ glusterfsd_msg_3);
break;
case GF_OPTION_DISABLE:
- ret = dict_set_static_ptr(options, "fopen-keep-cache", "off");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "fopen-keep-cache");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "fopen-keep-cache",
+ "off", glusterfsd_msg_3);
break;
- case GF_OPTION_DEFERRED: /* default */
default:
gf_msg_debug("glusterfsd", 0, "fopen-keep-cache mode %d",
cmd_args->fopen_keep_cache);
@@ -478,63 +417,43 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
}
if (cmd_args->gid_timeout_set) {
- ret = dict_set_int32(options, "gid-timeout", cmd_args->gid_timeout);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key gid-timeout");
- goto err;
- }
+ DICT_SET_VAL(dict_set_int32_sizen, options, "gid-timeout",
+ cmd_args->gid_timeout, glusterfsd_msg_3);
}
if (cmd_args->resolve_gids) {
- ret = dict_set_static_ptr(options, "resolve-gids", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "resolve-gids");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "resolve-gids", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->lru_limit >= 0) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "lru-limit",
+ cmd_args->lru_limit, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->invalidate_limit >= 0) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "invalidate-limit",
+ cmd_args->invalidate_limit, glusterfsd_msg_3);
}
if (cmd_args->background_qlen) {
- ret = dict_set_int32(options, "background-qlen",
- cmd_args->background_qlen);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "background-qlen");
- goto err;
- }
+ DICT_SET_VAL(dict_set_int32_sizen, options, "background-qlen",
+ cmd_args->background_qlen, glusterfsd_msg_3);
}
if (cmd_args->congestion_threshold) {
- ret = dict_set_int32(options, "congestion-threshold",
- cmd_args->congestion_threshold);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "congestion-threshold");
- goto err;
- }
+ DICT_SET_VAL(dict_set_int32_sizen, options, "congestion-threshold",
+ cmd_args->congestion_threshold, glusterfsd_msg_3);
}
switch (cmd_args->fuse_direct_io_mode) {
case GF_OPTION_DISABLE: /* disable */
- ret = dict_set_static_ptr(options, ZR_DIRECT_IO_OPT, "disable");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_5,
- "failed to set 'disable' for key " ZR_DIRECT_IO_OPT);
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DIRECT_IO_OPT,
+ "disable", glusterfsd_msg_3);
break;
case GF_OPTION_ENABLE: /* enable */
- ret = dict_set_static_ptr(options, ZR_DIRECT_IO_OPT, "enable");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_6,
- "failed to set 'enable' for key " ZR_DIRECT_IO_OPT);
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DIRECT_IO_OPT,
+ "enable", glusterfsd_msg_3);
break;
- case GF_OPTION_DEFERRED: /* auto */
default:
gf_msg_debug("glusterfsd", 0, "fuse direct io type %d",
cmd_args->fuse_direct_io_mode);
@@ -543,134 +462,83 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
switch (cmd_args->no_root_squash) {
case GF_OPTION_ENABLE: /* enable */
- ret = dict_set_static_ptr(options, "no-root-squash", "enable");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_6,
- "failed to set 'enable' for key "
- "no-root-squash");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "no-root-squash",
+ "enable", glusterfsd_msg_3);
break;
- case GF_OPTION_DISABLE: /* disable/default */
default:
- ret = dict_set_static_ptr(options, "no-root-squash", "disable");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_5,
- "failed to set 'disable' for key "
- "no-root-squash");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "no-root-squash",
+ "disable", glusterfsd_msg_3);
gf_msg_debug("glusterfsd", 0, "fuse no-root-squash mode %d",
cmd_args->no_root_squash);
break;
}
if (!cmd_args->no_daemon_mode) {
- ret = dict_set_static_ptr(options, "sync-to-mount", "enable");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key sync-mtab");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "sync-to-mount", "enable",
+ glusterfsd_msg_3);
}
if (cmd_args->use_readdirp) {
- ret = dict_set_str(options, "use-readdirp", cmd_args->use_readdirp);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "use-readdirp");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "use-readdirp",
+ cmd_args->use_readdirp, glusterfsd_msg_3);
}
if (cmd_args->event_history) {
ret = dict_set_str(options, "event-history", cmd_args->event_history);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "event-history");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "event-history",
+ cmd_args->event_history, glusterfsd_msg_3);
}
if (cmd_args->thin_client) {
- ret = dict_set_static_ptr(options, "thin-client", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "thin-client");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "thin-client", "on",
+ glusterfsd_msg_3);
}
if (cmd_args->reader_thread_count) {
- ret = dict_set_uint32(options, "reader-thread-count",
- cmd_args->reader_thread_count);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "reader-thread-count");
- goto err;
- }
+ DICT_SET_VAL(dict_set_uint32, options, "reader-thread-count",
+ cmd_args->reader_thread_count, glusterfsd_msg_3);
}
+
+ DICT_SET_VAL(dict_set_uint32, options, "auto-invalidation",
+ cmd_args->fuse_auto_inval, glusterfsd_msg_3);
+
switch (cmd_args->kernel_writeback_cache) {
case GF_OPTION_ENABLE:
- ret = dict_set_static_ptr(options, "kernel-writeback-cache", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "kernel-writeback-cache");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "kernel-writeback-cache",
+ "on", glusterfsd_msg_3);
break;
case GF_OPTION_DISABLE:
- ret = dict_set_static_ptr(options, "kernel-writeback-cache", "off");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "kernel-writeback-cache");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "kernel-writeback-cache",
+ "off", glusterfsd_msg_3);
break;
- case GF_OPTION_DEFERRED: /* default */
default:
gf_msg_debug("glusterfsd", 0, "kernel-writeback-cache mode %d",
cmd_args->kernel_writeback_cache);
break;
}
if (cmd_args->attr_times_granularity) {
- ret = dict_set_uint32(options, "attr-times-granularity",
- cmd_args->attr_times_granularity);
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "attr-times-granularity");
- goto err;
- }
+ DICT_SET_VAL(dict_set_uint32, options, "attr-times-granularity",
+ cmd_args->attr_times_granularity, glusterfsd_msg_3);
}
switch (cmd_args->fuse_flush_handle_interrupt) {
case GF_OPTION_ENABLE:
- ret = dict_set_static_ptr(options, "flush-handle-interrupt", "on");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "flush-handle-interrupt");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "flush-handle-interrupt",
+ "on", glusterfsd_msg_3);
break;
case GF_OPTION_DISABLE:
- ret = dict_set_static_ptr(options, "flush-handle-interrupt", "off");
- if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key "
- "flush-handle-interrupt");
- goto err;
- }
+ DICT_SET_VAL(dict_set_static_ptr, options, "flush-handle-interrupt",
+ "off", glusterfsd_msg_3);
break;
- case GF_OPTION_DEFERRED: /* default */
default:
gf_msg_debug("glusterfsd", 0, "fuse-flush-handle-interrupt mode %d",
cmd_args->fuse_flush_handle_interrupt);
break;
}
+ if (cmd_args->global_threading) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "global-threading", "on",
+ glusterfsd_msg_3);
+ }
+ if (cmd_args->fuse_dev_eperm_ratelimit_ns) {
+ DICT_SET_VAL(dict_set_uint32, options, "fuse-dev-eperm-ratelimit-ns",
+ cmd_args->fuse_dev_eperm_ratelimit_ns, glusterfsd_msg_3);
+ }
ret = 0;
err:
@@ -685,7 +553,6 @@ create_fuse_mount(glusterfs_ctx_t *ctx)
xlator_t *master = NULL;
cmd_args = &ctx->cmd_args;
-
if (!cmd_args->mount_point) {
gf_msg_trace("glusterfsd", 0,
"mount point not found, not a client process");
@@ -693,8 +560,7 @@ create_fuse_mount(glusterfs_ctx_t *ctx)
}
if (ctx->process_mode != GF_CLIENT_PROCESS) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_7,
- "Not a client process, not performing mount operation");
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_7, NULL);
return -1;
}
@@ -707,13 +573,13 @@ create_fuse_mount(glusterfs_ctx_t *ctx)
goto err;
if (xlator_set_type(master, "mount/fuse") == -1) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_8,
- "MOUNT-POINT %s initialization failed", cmd_args->mount_point);
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_8,
+ "MOUNT-POINT=%s", cmd_args->mount_point, NULL);
goto err;
}
master->ctx = ctx;
- master->options = get_new_dict();
+ master->options = dict_new();
if (!master->options)
goto err;
@@ -725,8 +591,8 @@ create_fuse_mount(glusterfs_ctx_t *ctx)
ret = dict_set_static_ptr(master->options, ZR_FUSE_MOUNTOPTS,
cmd_args->fuse_mountopts);
if (ret < 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
- "failed to set dict value for key " ZR_FUSE_MOUNTOPTS);
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_3,
+ ZR_FUSE_MOUNTOPTS, NULL);
goto err;
}
}
@@ -752,23 +618,14 @@ err:
static FILE *
get_volfp(glusterfs_ctx_t *ctx)
{
- int ret = 0;
cmd_args_t *cmd_args = NULL;
FILE *specfp = NULL;
- struct stat statbuf;
cmd_args = &ctx->cmd_args;
- ret = sys_lstat(cmd_args->volfile, &statbuf);
- if (ret == -1) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_9,
- "loading volume file %s failed", cmd_args->volfile);
- return NULL;
- }
-
if ((specfp = fopen(cmd_args->volfile, "r")) == NULL) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_9,
- "loading volume file %s failed", cmd_args->volfile);
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_9,
+ "volume_file=%s", cmd_args->volfile, NULL);
return NULL;
}
@@ -824,8 +681,7 @@ gf_remember_xlator_option(char *arg)
dot = strchr(arg, '.');
if (!dot) {
- gf_msg("", GF_LOG_WARNING, 0, glusterfsd_msg_10,
- "xlator option %s is invalid", arg);
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
goto out;
}
@@ -838,8 +694,7 @@ gf_remember_xlator_option(char *arg)
equals = strchr(arg, '=');
if (!equals) {
- gf_msg("", GF_LOG_WARNING, 0, glusterfsd_msg_10,
- "xlator option %s is invalid", arg);
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
goto out;
}
@@ -851,8 +706,7 @@ gf_remember_xlator_option(char *arg)
option->key[(equals - dot - 1)] = '\0';
if (!*(equals + 1)) {
- gf_msg("", GF_LOG_WARNING, 0, glusterfsd_msg_10,
- "xlator option %s is invalid", arg);
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
goto out;
}
@@ -1073,6 +927,10 @@ parse_opts(int key, char *arg, struct argp_state *state)
cmd_args->thin_client = _gf_true;
break;
+ case ARGP_BRICK_MUX_KEY:
+ cmd_args->brick_mux = _gf_true;
+ break;
+
case ARGP_PID_FILE_KEY:
cmd_args->pid_file = gf_strdup(arg);
break;
@@ -1190,7 +1048,6 @@ parse_opts(int key, char *arg, struct argp_state *state)
case ARGP_KEY_ARG:
if (state->arg_num >= 1)
argp_usage(state);
-
cmd_args->mount_point = gf_strdup(arg);
break;
@@ -1203,19 +1060,21 @@ parse_opts(int key, char *arg, struct argp_state *state)
case ARGP_BRICK_PORT_KEY:
n = 0;
- port_str = strtok_r(arg, ",", &tmp_str);
- if (gf_string2uint_base10(port_str, &n) == 0) {
- cmd_args->brick_port = n;
- port_str = strtok_r(NULL, ",", &tmp_str);
- if (port_str) {
- if (gf_string2uint_base10(port_str, &n) == 0) {
- cmd_args->brick_port2 = n;
- break;
+ if (arg != NULL) {
+ port_str = strtok_r(arg, ",", &tmp_str);
+ if (gf_string2uint_base10(port_str, &n) == 0) {
+ cmd_args->brick_port = n;
+ port_str = strtok_r(NULL, ",", &tmp_str);
+ if (port_str) {
+ if (gf_string2uint_base10(port_str, &n) == 0) {
+ cmd_args->brick_port2 = n;
+ break;
+ }
+ argp_failure(state, -1, 0,
+ "wrong brick (listen) port %s", arg);
}
- argp_failure(state, -1, 0, "wrong brick (listen) port %s",
- arg);
+ break;
}
- break;
}
argp_failure(state, -1, 0, "unknown brick (listen) port %s", arg);
@@ -1257,6 +1116,21 @@ parse_opts(int key, char *arg, struct argp_state *state)
cmd_args->resolve_gids = 1;
break;
+ case ARGP_FUSE_LRU_LIMIT_KEY:
+ if (!gf_string2int32(arg, &cmd_args->lru_limit))
+ break;
+
+ argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
+ break;
+
+ case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
+ if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
+ break;
+
+ argp_failure(state, -1, 0, "unknown invalidate limit option %s",
+ arg);
+ break;
+
case ARGP_FUSE_BACKGROUND_QLEN_KEY:
if (!gf_string2int(arg, &cmd_args->background_qlen))
break;
@@ -1464,6 +1338,46 @@ parse_opts(int key, char *arg, struct argp_state *state)
"unknown fuse flush handle interrupt setting \"%s\"",
arg);
break;
+
+ case ARGP_FUSE_AUTO_INVAL_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fuse_auto_inval = b;
+ break;
+ }
+
+ break;
+
+ case ARGP_GLOBAL_THREADING_KEY:
+ if (!arg || (*arg == 0)) {
+ arg = "yes";
+ }
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->global_threading = b;
+ break;
+ }
+
+ argp_failure(state, -1, 0,
+ "Invalid value for global threading \"%s\"", arg);
+ break;
+
+ case ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY:
+ if (gf_string2uint32(arg, &cmd_args->fuse_dev_eperm_ratelimit_ns)) {
+ argp_failure(state, -1, 0,
+ "Non-numerical value for "
+ "'fuse-dev-eperm-ratelimit-ns' option %s",
+ arg);
+ } else if (cmd_args->fuse_dev_eperm_ratelimit_ns > 1000000000) {
+ argp_failure(state, -1, 0,
+ "Invalid 'fuse-dev-eperm-ratelimit-ns' value %s. "
+ "Valid range: [\"0, 1000000000\"]",
+ arg);
+ }
+
+ break;
}
return 0;
}
@@ -1481,11 +1395,6 @@ should_call_fini(glusterfs_ctx_t *ctx, xlator_t *trav)
return _gf_true;
}
- /* This is the only one known to be safe in glusterfsd. */
- if (!strcmp(trav->type, "experimental/fdl")) {
- return _gf_true;
- }
-
return _gf_false;
}
@@ -1574,11 +1483,20 @@ cleanup_and_exit(int signum)
#endif
trav = NULL;
+
+ /* previously we were releasing the cleanup mutex lock before the
+ process exit. As we are releasing the cleanup mutex lock, before
+ the process can exit some other thread which is blocked on
+ cleanup mutex lock is acquiring the cleanup mutex lock and
+ trying to acquire some resources which are already freed as a
+ part of cleanup. To avoid this, we are exiting the process without
+ releasing the cleanup mutex lock. This will not cause any lock
+ related issues as the process which acquired the lock is going down
+ */
+ /* NOTE: Only the least significant 8 bits i.e (signum & 255)
+ will be available to parent process on calling exit() */
+ exit(abs(signum));
}
- pthread_mutex_unlock(&ctx->cleanup_lock);
- /* NOTE: Only the least significant 8 bits i.e (signum & 255)
- will be available to parent process on calling exit() */
- exit(abs(signum));
}
static void
@@ -1591,22 +1509,18 @@ reincarnate(int signum)
ctx = glusterfsd_ctx;
cmd_args = &ctx->cmd_args;
+ gf_msg_trace("gluster", 0, "received reincarnate request (sig:HUP)");
+
if (cmd_args->volfile_server) {
- gf_msg("glusterfsd", GF_LOG_INFO, 0, glusterfsd_msg_11,
- "Fetching the volume file from server...");
+ gf_smsg("glusterfsd", GF_LOG_INFO, 0, glusterfsd_msg_11, NULL);
ret = glusterfs_volfile_fetch(ctx);
- } else {
- gf_msg_debug("glusterfsd", 0,
- "Not reloading volume specification file"
- " on SIGHUP");
}
/* Also, SIGHUP should do logrotate */
gf_log_logrotate(1);
if (ret < 0)
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_12,
- "volume initialization failed.");
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_12, NULL);
return;
}
@@ -1658,8 +1572,7 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
ret = xlator_mem_acct_init(THIS, gfd_mt_end);
if (ret != 0) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, 0, glusterfsd_msg_34,
- "memory accounting init failed.");
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, 0, glusterfsd_msg_34, NULL);
return ret;
}
@@ -1673,8 +1586,7 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
ctx->process_uuid = generate_glusterfs_ctx_id();
if (!ctx->process_uuid) {
- gf_msg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_13,
- "ERROR: glusterfs uuid generation failed");
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_13, NULL);
goto out;
}
@@ -1682,23 +1594,20 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
ctx->iobuf_pool = iobuf_pool_new();
if (!ctx->iobuf_pool) {
- gf_msg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14,
- "ERROR: glusterfs iobuf pool creation failed");
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "iobuf", NULL);
goto out;
}
- ctx->event_pool = event_pool_new(DEFAULT_EVENT_POOL_SIZE,
- STARTING_EVENT_THREADS);
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
if (!ctx->event_pool) {
- gf_msg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14,
- "ERROR: glusterfs event pool creation failed");
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "event", NULL);
goto out;
}
ctx->pool = GF_CALLOC(1, sizeof(call_pool_t), gfd_mt_call_pool_t);
if (!ctx->pool) {
- gf_msg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14,
- "ERROR: glusterfs call pool creation failed");
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "call", NULL);
goto out;
}
@@ -1708,22 +1617,19 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
/* frame_mem_pool size 112 * 4k */
ctx->pool->frame_mem_pool = mem_pool_new(call_frame_t, 4096);
if (!ctx->pool->frame_mem_pool) {
- gf_msg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14,
- "ERROR: glusterfs frame pool creation failed");
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "frame", NULL);
goto out;
}
/* stack_mem_pool size 256 * 1024 */
ctx->pool->stack_mem_pool = mem_pool_new(call_stack_t, 1024);
if (!ctx->pool->stack_mem_pool) {
- gf_msg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14,
- "ERROR: glusterfs stack pool creation failed");
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "stack", NULL);
goto out;
}
ctx->stub_mem_pool = mem_pool_new(call_stub_t, 1024);
if (!ctx->stub_mem_pool) {
- gf_msg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14,
- "ERROR: glusterfs stub pool creation failed");
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "stub", NULL);
goto out;
}
@@ -1781,6 +1687,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
INIT_LIST_HEAD(&cmd_args->xlator_options);
INIT_LIST_HEAD(&cmd_args->volfile_servers);
+ ctx->pxl_count = 0;
+ pthread_mutex_init(&ctx->fd_lock, NULL);
+ pthread_cond_init(&ctx->fd_cond, NULL);
+ INIT_LIST_HEAD(&ctx->janitor_fds);
lim.rlim_cur = RLIM_INFINITY;
lim.rlim_max = RLIM_INFINITY;
@@ -2069,7 +1979,7 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
struct stat stbuf = {
0,
};
- char timestr[32];
+ char timestr[GF_TIMESTR_SIZE];
char tmp_logfile[1024] = {0};
char *tmp_logfile_dyn = NULL;
char *tmp_logfilebase = NULL;
@@ -2085,6 +1995,11 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
}
+ /* Need to set lru_limit to below 0 to indicate there was nothing
+ specified. This is needed as 0 is a valid option, and may not be
+ default value. */
+ cmd_args->lru_limit = -1;
+
argp_parse(&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
if (cmd_args->print_xlatordir || cmd_args->print_statedumpdir ||
@@ -2126,9 +2041,7 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
/* Make sure after the parsing cli, if '--volfile-server' option is
given, then '--volfile-id' is mandatory */
if (cmd_args->volfile_server && !cmd_args->volfile_id) {
- gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_15,
- "ERROR: '--volfile-id' is mandatory if '-s' OR "
- "'--volfile-server' option is given");
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_15, NULL);
ret = -1;
goto out;
}
@@ -2145,8 +2058,8 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
and exit */
ret = sys_stat(cmd_args->volfile, &stbuf);
if (ret) {
- gf_msg("glusterfs", GF_LOG_CRITICAL, errno, glusterfsd_msg_16,
- "ERROR: parsing the volfile failed");
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, errno, glusterfsd_msg_16,
+ NULL);
/* argp_usage (argp.) */
fprintf(stderr, "USAGE: %s [options] [mountpoint]\n", argv[0]);
goto out;
@@ -2170,8 +2083,8 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
if (((ret == 0) &&
(S_ISREG(stbuf.st_mode) || S_ISLNK(stbuf.st_mode))) ||
(ret == -1)) {
- /* Have separate logfile per run */
- gf_time_fmt(timestr, sizeof timestr, time(NULL), gf_timefmt_FT);
+ /* Have separate logfile per run. */
+ gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT);
sprintf(tmp_logfile, "%s.%s.%d", cmd_args->log_file, timestr,
getpid());
@@ -2198,9 +2111,7 @@ parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
compatibility with third party applications
*/
if (cmd_args->max_connect_attempts) {
- gf_msg("glusterfs", GF_LOG_WARNING, 0, glusterfsd_msg_33,
- "obsolete option '--volfile-max-fecth-attempts or "
- "fetch-attempts' was provided");
+ gf_smsg("glusterfs", GF_LOG_WARNING, 0, glusterfsd_msg_33, NULL);
}
#ifdef GF_DARWIN_HOST_OS
@@ -2227,8 +2138,8 @@ glusterfs_pidfile_setup(glusterfs_ctx_t *ctx)
pidfp = fopen(cmd_args->pid_file, "a+");
if (!pidfp) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_17,
- "pidfile %s open failed", cmd_args->pid_file);
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_17,
+ "pidfile=%s", cmd_args->pid_file, NULL);
goto out;
}
@@ -2279,29 +2190,29 @@ glusterfs_pidfile_update(glusterfs_ctx_t *ctx, pid_t pid)
ret = lockf(fileno(pidfp), F_TLOCK, 0);
if (ret) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_18,
- "pidfile %s lock failed", cmd_args->pid_file);
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_18,
+ "pidfile=%s", cmd_args->pid_file, NULL);
return ret;
}
ret = sys_ftruncate(fileno(pidfp), 0);
if (ret) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_20,
- "pidfile %s truncation failed", cmd_args->pid_file);
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_20,
+ "pidfile=%s", cmd_args->pid_file, NULL);
return ret;
}
ret = fprintf(pidfp, "%d\n", pid);
if (ret <= 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_21,
- "pidfile %s write failed", cmd_args->pid_file);
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_21,
+ "pidfile=%s", cmd_args->pid_file, NULL);
return ret;
}
ret = fflush(pidfp);
if (ret) {
- gf_msg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_21,
- "pidfile %s write failed", cmd_args->pid_file);
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_21,
+ "pidfile=%s", cmd_args->pid_file, NULL);
return ret;
}
@@ -2388,10 +2299,13 @@ glusterfs_signals_setup(glusterfs_ctx_t *ctx)
sigaddset(&set, SIGUSR1); /* gf_proc_dump_info */
sigaddset(&set, SIGUSR2);
+ /* Signals needed for asynchronous framework. */
+ sigaddset(&set, GF_ASYNC_SIGQUEUE);
+ sigaddset(&set, GF_ASYNC_SIGCTRL);
+
ret = pthread_sigmask(SIG_BLOCK, &set, NULL);
if (ret) {
- gf_msg("glusterfsd", GF_LOG_WARNING, errno, glusterfsd_msg_22,
- "failed to execute pthread_sigmask");
+ gf_smsg("glusterfsd", GF_LOG_WARNING, errno, glusterfsd_msg_22, NULL);
return ret;
}
@@ -2403,8 +2317,7 @@ glusterfs_signals_setup(glusterfs_ctx_t *ctx)
fallback to signals getting handled by other threads.
setup the signal handlers
*/
- gf_msg("glusterfsd", GF_LOG_WARNING, errno, glusterfsd_msg_23,
- "failed to create pthread");
+ gf_smsg("glusterfsd", GF_LOG_WARNING, errno, glusterfsd_msg_23, NULL);
return ret;
}
@@ -2427,9 +2340,6 @@ daemonize(glusterfs_ctx_t *ctx)
goto out;
if (cmd_args->no_daemon_mode) {
- ret = glusterfs_pidfile_update(ctx, getpid());
- if (ret)
- goto out;
goto postfork;
}
@@ -2453,8 +2363,7 @@ daemonize(glusterfs_ctx_t *ctx)
sys_close(ctx->daemon_pipe[1]);
}
- gf_msg("daemonize", GF_LOG_ERROR, errno, glusterfsd_msg_24,
- "daemonization failed");
+ gf_smsg("daemonize", GF_LOG_ERROR, errno, glusterfsd_msg_24, NULL);
goto out;
case 0:
/* child */
@@ -2475,8 +2384,8 @@ daemonize(glusterfs_ctx_t *ctx)
} else {
err = cstatus;
}
- gf_msg("daemonize", GF_LOG_ERROR, 0, glusterfsd_msg_25,
- "mount failed");
+ gf_smsg("daemonize", GF_LOG_ERROR, 0, glusterfsd_msg_25,
+ NULL);
exit(err);
}
}
@@ -2485,13 +2394,28 @@ daemonize(glusterfs_ctx_t *ctx)
will be available to parent process on calling exit() */
if (err)
_exit(abs(err));
- ret = glusterfs_pidfile_update(ctx, child_pid);
- if (ret)
- _exit(1);
+
+ /* Update pid in parent only for glusterd process */
+ if (ctx->process_mode == GF_GLUSTERD_PROCESS) {
+ ret = glusterfs_pidfile_update(ctx, child_pid);
+ if (ret)
+ exit(1);
+ }
_exit(0);
}
postfork:
+ /* Update pid in child either process_mode is not belong to glusterd
+ or process is spawned in no daemon mode
+ */
+ if ((ctx->process_mode != GF_GLUSTERD_PROCESS) ||
+ (cmd_args->no_daemon_mode)) {
+ ret = glusterfs_pidfile_update(ctx, getpid());
+ if (ret)
+ goto out;
+ }
+ gf_log("glusterfs", GF_LOG_INFO, "Pid of current running process is %d",
+ getpid());
ret = gf_log_inject_timer_event(ctx);
glusterfs_signals_setup(ctx);
@@ -2541,24 +2465,6 @@ out:
#endif
int
-glusterfs_graph_fini(glusterfs_graph_t *graph)
-{
- xlator_t *trav = NULL;
-
- trav = graph->first;
-
- while (trav) {
- if (trav->init_succeeded) {
- trav->fini(trav);
- trav->init_succeeded = 0;
- }
- trav = trav->next;
- }
-
- return 0;
-}
-
-int
glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp)
{
glusterfs_graph_t *graph = NULL;
@@ -2570,16 +2476,13 @@ glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp)
graph = glusterfs_graph_construct(fp);
if (!graph) {
- gf_msg("", GF_LOG_ERROR, 0, glusterfsd_msg_26,
- "failed to construct the graph");
+ gf_smsg("", GF_LOG_ERROR, 0, glusterfsd_msg_26, NULL);
goto out;
}
for (trav = graph->first; trav; trav = trav->next) {
if (strcmp(trav->type, "mount/fuse") == 0) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_27,
- "fuse xlator cannot be specified in volume "
- "file");
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_27, NULL);
goto out;
}
}
@@ -2618,6 +2521,9 @@ out:
xl = graph->first;
if ((ctx->active != graph) &&
(xl && !strcmp(xl->type, "protocol/server"))) {
+ /* Take dict ref for every graph xlator to avoid dict leak
+ at the time of graph destroying
+ */
glusterfs_graph_fini(graph);
glusterfs_graph_destroy(graph);
}
@@ -2657,8 +2563,7 @@ glusterfs_volumes_init(glusterfs_ctx_t *ctx)
fp = get_volfp(ctx);
if (!fp) {
- gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_28,
- "Cannot reach volume specification file");
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_28, NULL);
ret = -1;
goto out;
}
@@ -2685,14 +2590,11 @@ main(int argc, char *argv[])
};
cmd_args_t *cmd = NULL;
- mem_pools_init_early();
-
gf_check_and_set_mem_acct(argc, argv);
ctx = glusterfs_ctx_new();
if (!ctx) {
- gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_29,
- "ERROR: glusterfs context not initialized");
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_29, NULL);
return ENOMEM;
}
glusterfsd_ctx = ctx;
@@ -2754,6 +2656,12 @@ main(int argc, char *argv[])
if (ret)
goto out;
+ /* set brick_mux mode only for server process */
+ if ((ctx->process_mode != GF_SERVER_PROCESS) && cmd->brick_mux) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_43, NULL);
+ goto out;
+ }
+
/* log the version of glusterfs running here along with the actual
command line options. */
{
@@ -2765,15 +2673,14 @@ main(int argc, char *argv[])
len = snprintf(cmdlinestr + pos, sizeof(cmdlinestr) - pos, " %s",
argv[i]);
if ((len <= 0) || (len >= (sizeof(cmdlinestr) - pos))) {
- gf_msg("glusterfs", GF_LOG_ERROR, 0, glusterfsd_msg_29,
- "failed to create command line string");
+ gf_smsg("glusterfs", GF_LOG_ERROR, 0, glusterfsd_msg_029, NULL);
ret = -1;
goto out;
}
}
- gf_msg(argv[0], GF_LOG_INFO, 0, glusterfsd_msg_30,
- "Started running %s version %s (args: %s)", argv[0],
- PACKAGE_VERSION, cmdlinestr);
+ gf_smsg(argv[0], GF_LOG_INFO, 0, glusterfsd_msg_30, "arg=%s", argv[0],
+ "version=%s", PACKAGE_VERSION, "cmdlinestr=%s", cmdlinestr,
+ NULL);
ctx->cmdlinestr = gf_strdup(cmdlinestr);
}
@@ -2793,7 +2700,12 @@ main(int argc, char *argv[])
* the parent, but we want to do it as soon as possible after that in
* case something else depends on pool allocations.
*/
- mem_pools_init_late();
+ mem_pools_init();
+
+ ret = gf_async_init(ctx);
+ if (ret < 0) {
+ goto out;
+ }
#ifdef GF_LINUX_HOST_OS
ret = set_oom_score_adj(ctx);
@@ -2803,26 +2715,24 @@ main(int argc, char *argv[])
ctx->env = syncenv_new(0, 0, 0);
if (!ctx->env) {
- gf_msg("", GF_LOG_ERROR, 0, glusterfsd_msg_31,
- "Could not create new sync-environment");
+ gf_smsg("", GF_LOG_ERROR, 0, glusterfsd_msg_31, NULL);
goto out;
}
/* do this _after_ daemonize() */
- if (cmd->global_timer_wheel) {
- if (!glusterfs_ctx_tw_get(ctx)) {
- ret = -1;
- goto out;
- }
+ if (!glusterfs_ctx_tw_get(ctx)) {
+ ret = -1;
+ goto out;
}
ret = glusterfs_volumes_init(ctx);
if (ret)
goto out;
- ret = event_dispatch(ctx->event_pool);
+ ret = gf_event_dispatch(ctx->event_pool);
out:
- // glusterfs_ctx_destroy (ctx);
+ // glusterfs_ctx_destroy (ctx);
+ gf_async_fini();
return ret;
}
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 0042054e138..4e1413caa70 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -109,6 +109,12 @@ enum argp_option_keys {
ARGP_ATTR_TIMES_GRANULARITY_KEY = 187,
ARGP_PRINT_LIBEXECDIR_KEY = 188,
ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
+ ARGP_FUSE_LRU_LIMIT_KEY = 190,
+ ARGP_FUSE_AUTO_INVAL_KEY = 191,
+ ARGP_GLOBAL_THREADING_KEY = 192,
+ ARGP_BRICK_MUX_KEY = 193,
+ ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY = 194,
+ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
};
struct _gfd_vol_top_priv {
@@ -129,14 +135,6 @@ glusterfs_volfile_fetch(glusterfs_ctx_t *ctx);
void
cleanup_and_exit(int signum);
-int
-glusterfs_volume_top_write_perf(uint32_t blk_size, uint32_t blk_count,
- char *brick_path, double *throughput,
- double *time);
-int
-glusterfs_volume_top_read_perf(uint32_t blk_size, uint32_t blk_count,
- char *brick_path, double *throughput,
- double *time);
void
xlator_mem_cleanup(xlator_t *this);
diff --git a/heal/src/Makefile.am b/heal/src/Makefile.am
index f04a294cc56..aa18d3eff88 100644
--- a/heal/src/Makefile.am
+++ b/heal/src/Makefile.am
@@ -1,5 +1,6 @@
if WITH_SERVER
-sbin_PROGRAMS = glfsheal
+scriptdir = $(GLUSTERFS_LIBEXECDIR)
+script_PROGRAMS = glfsheal
endif
glfsheal_SOURCES = glfs-heal.c
@@ -18,8 +19,7 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/rpc/xdr/src\
-I$(top_builddir)/rpc/xdr/src\
-I$(top_srcdir)/api/src\
- -DDATADIR=\"$(localstatedir)\" \
- -DSBIN_DIR=\"$(sbindir)\"
+ -DDATADIR=\"$(localstatedir)\"
AM_CFLAGS = -Wall $(GF_CFLAGS) $(XML_CFLAGS)
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
index 0ce9defc83a..bf4b47f8760 100644
--- a/heal/src/glfs-heal.c
+++ b/heal/src/glfs-heal.c
@@ -12,15 +12,13 @@
#include <stdlib.h>
#include <errno.h>
#include "glfs.h"
-#include "glfs-handles.h"
#include "glfs-internal.h"
#include "protocol-common.h"
-#include "syscall.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#include <string.h>
-#include <time.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include <libgen.h>
#if (HAVE_LIB_XML)
@@ -1057,6 +1055,10 @@ glfsh_set_heal_options(glfs_t *fs, gf_xl_afr_op_t heal_op)
if (ret)
goto out;
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "halo-enabled", "off");
+ if (ret)
+ goto out;
+
if ((heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) &&
(heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) &&
(heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME))
@@ -1146,7 +1148,8 @@ glfsh_gather_heal_info(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
while (xl->next)
xl = xl->next;
while (xl) {
- if (strcmp(xl->type, "protocol/client") == 0) {
+ if (strcmp(xl->type, "protocol/client") == 0 &&
+ !strstr(xl->name, "-ta-")) {
heal_xl = _get_ancestor(xl, heal_op);
if (heal_xl) {
old_THIS = THIS;
@@ -1688,6 +1691,12 @@ main(int argc, char **argv)
goto out;
}
+ ret = glfs_setfspid(fs, GF_CLIENT_PID_GLFS_HEAL);
+ if (ret) {
+ printf("Setting client pid failed, %s\n", strerror(errno));
+ goto out;
+ }
+
ret = glfs_init(fs);
if (ret < 0) {
ret = -errno;
@@ -1720,14 +1729,19 @@ main(int argc, char **argv)
goto out;
}
+ char *var_str = (heal_op == GF_SHD_OP_INDEX_SUMMARY ||
+ heal_op == GF_SHD_OP_HEAL_SUMMARY)
+ ? "replicate/disperse"
+ : "replicate";
+
ret = glfsh_validate_volume(top_subvol, heal_op);
if (ret < 0) {
ret = -EINVAL;
- gf_asprintf(&op_errstr, "Volume %s is not of type %s", volname,
- (heal_op == GF_SHD_OP_INDEX_SUMMARY ||
- heal_op == GF_SHD_OP_HEAL_SUMMARY)
- ? "replicate/disperse"
- : "replicate");
+ gf_asprintf(&op_errstr,
+ "This command is supported "
+ "for only volumes of %s type. Volume %s "
+ "is not of type %s",
+ var_str, volname, var_str);
goto out;
}
rootloc.inode = inode_ref(top_subvol->itable->root);
diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in
index e2ff1fb6214..79eac2ad2d3 100644
--- a/libgfchangelog.pc.in
+++ b/libgfchangelog.pc.in
@@ -9,4 +9,4 @@ Description: GlusterFS Changelog Consumer Library
Version: @LIBGFCHANGELOG_VERSION@
Requires: @PKGCONFIG_UUID@
Libs: -L${libdir} -lgfchangelog -lglusterfs
-Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
+Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
diff --git a/libgfdb.pc.in b/libgfdb.pc.in
deleted file mode 100644
index 463e8becd3a..00000000000
--- a/libgfdb.pc.in
+++ /dev/null
@@ -1,12 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-
-Name: libgfdb
-Description: GlusterFS Database Library
-Version: @LIBGFDB_VERSION@
-Libs: -L${libdir} -lgfchangedb -lglusterfs
-Cflags: -I${includedir}
-Requires: sqlite3 @PKGCONFIG_UUID@
diff --git a/xlators/experimental/dht2/dht2-client/Makefile.am b/libglusterd/Makefile.am
index a985f42a877..a985f42a877 100644
--- a/xlators/experimental/dht2/dht2-client/Makefile.am
+++ b/libglusterd/Makefile.am
diff --git a/libglusterd/src/Makefile.am b/libglusterd/src/Makefile.am
new file mode 100644
index 00000000000..684d2bac96b
--- /dev/null
+++ b/libglusterd/src/Makefile.am
@@ -0,0 +1,31 @@
+libglusterd_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libglusterd_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \
+ -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \
+ -I$(CONTRIBDIR)/rbtree \
+ -I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \
+ -DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \
+ -I$(CONTRIBDIR)/xxhash \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src/
+
+libglusterd_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS)
+libglusterd_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/libglusterd/src/libglusterd.sym
+
+lib_LTLIBRARIES = libglusterd.la
+
+libglusterd_la_SOURCES = gd-common-utils.c
+
+libglusterd_la_HEADERS = gd-common-utils.h
+
+libglusterd_ladir = $(includedir)/glusterfs
+
+noinst_HEADERS = gd-common-utils.h
+
+EXTRA_DIST = libglusterd.sym
+
+CLEANFILES =
diff --git a/libglusterd/src/gd-common-utils.c b/libglusterd/src/gd-common-utils.c
new file mode 100644
index 00000000000..243fab215e6
--- /dev/null
+++ b/libglusterd/src/gd-common-utils.c
@@ -0,0 +1,78 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gd-common-utils.h"
+#include "cli1-xdr.h"
+
+int
+get_vol_type(int type, int dist_count, int brick_count)
+{
+ if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) &&
+ (dist_count < brick_count))
+ type = type + GF_CLUSTER_TYPE_MAX - 1;
+
+ return type;
+}
+
+char *
+get_struct_variable(int mem_num, gf_gsync_status_t *sts_val)
+{
+ switch (mem_num) {
+ case 0:
+ return (sts_val->node);
+ case 1:
+ return (sts_val->master);
+ case 2:
+ return (sts_val->brick);
+ case 3:
+ return (sts_val->slave_user);
+ case 4:
+ return (sts_val->slave);
+ case 5:
+ return (sts_val->slave_node);
+ case 6:
+ return (sts_val->worker_status);
+ case 7:
+ return (sts_val->crawl_status);
+ case 8:
+ return (sts_val->last_synced);
+ case 9:
+ return (sts_val->entry);
+ case 10:
+ return (sts_val->data);
+ case 11:
+ return (sts_val->meta);
+ case 12:
+ return (sts_val->failures);
+ case 13:
+ return (sts_val->checkpoint_time);
+ case 14:
+ return (sts_val->checkpoint_completed);
+ case 15:
+ return (sts_val->checkpoint_completion_time);
+ case 16:
+ return (sts_val->brick_host_uuid);
+ case 17:
+ return (sts_val->last_synced_utc);
+ case 18:
+ return (sts_val->checkpoint_time_utc);
+ case 19:
+ return (sts_val->checkpoint_completion_time_utc);
+ case 20:
+ return (sts_val->slavekey);
+ case 21:
+ return (sts_val->session_slave);
+ default:
+ goto out;
+ }
+
+out:
+ return NULL;
+}
diff --git a/libglusterd/src/gd-common-utils.h b/libglusterd/src/gd-common-utils.h
new file mode 100644
index 00000000000..b9bb4f956fe
--- /dev/null
+++ b/libglusterd/src/gd-common-utils.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GD_COMMON_UTILS_H
+#define _GD_COMMON_UTILS_H
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <stddef.h>
+
+#include "protocol-common.h"
+#include "rpcsvc.h"
+
+int
+get_vol_type(int type, int dist_count, int brick_count);
+
+char *
+get_struct_variable(int mem_num, gf_gsync_status_t *sts_val);
+
+#endif /* _GD_COMMON_UTILS_H */
diff --git a/libglusterd/src/libglusterd.sym b/libglusterd/src/libglusterd.sym
new file mode 100644
index 00000000000..45969a87c12
--- /dev/null
+++ b/libglusterd/src/libglusterd.sym
@@ -0,0 +1,2 @@
+get_vol_type
+get_struct_variable
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index a910d8e8a53..385e8ef4600 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -6,14 +6,14 @@ libglusterfs_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
-DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
-DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \
- -DXXH_NAMESPACE=GF_ \
- -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \
- -I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree \
+ -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \
+ -I$(CONTRIBDIR)/rbtree \
-I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \
-DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \
-I$(CONTRIBDIR)/xxhash
-libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS)
+libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL) \
+ $(URCU_LIBS) $(URCU_CDS_LIBS)
libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \
-export-symbols $(top_srcdir)/libglusterfs/src/libglusterfs.sym
@@ -37,30 +37,36 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
$(CONTRIBDIR)/timer-wheel/timer-wheel.c \
$(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \
$(CONTRIBDIR)/xxhash/xxhash.c \
- compound-fop-utils.c throttle-tbf.c monitoring.c
+ throttle-tbf.c monitoring.c async.c
nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c defaults.c
-nodist_libglusterfs_la_HEADERS = y.tab.h protocol-common.h
-
-BUILT_SOURCES = graph.lex.c defaults.c eventtypes.h protocol-common.h
-
-protocol-common.h: $(top_srcdir)/rpc/rpc-lib/src/protocol-common.h
- cp $(top_srcdir)/rpc/rpc-lib/src/protocol-common.h .
-
-libglusterfs_la_HEADERS = common-utils.h defaults.h default-args.h \
- dict.h glusterfs.h hashfn.h timespec.h logging.h xlator.h \
- stack.h timer.h list.h inode.h call-stub.h compat.h fd.h \
- revision.h compat-errno.h gf-event.h mem-pool.h byte-order.h \
- gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h \
- checksum.h daemon.h store.h rbthash.h iatt.h latency.h \
- mem-types.h syncop.h cluster-syncop.h graph-utils.h trie.h \
- refcount.h run.h options.h lkowner.h fd-lk.h circ-buff.h \
- event-history.h gidcache.h client_t.h glusterfs-acl.h \
- glfs-message-id.h template-component-messages.h strfd.h \
- syncop-utils.h parse-utils.h libglusterfs-messages.h \
- lvm-defaults.h quota-common-utils.h rot-buffs.h \
- compat-uuid.h upcall-utils.h throttle-tbf.h events.h\
- compound-fop-utils.h atomic.h monitoring.h
+nodist_libglusterfs_la_HEADERS = y.tab.h
+
+BUILT_SOURCES = graph.lex.c defaults.c eventtypes.h
+
+libglusterfs_la_HEADERS = glusterfs/common-utils.h glusterfs/defaults.h \
+ glusterfs/default-args.h glusterfs/dict.h glusterfs/glusterfs.h \
+ glusterfs/hashfn.h glusterfs/timespec.h glusterfs/logging.h \
+ glusterfs/xlator.h glusterfs/stack.h glusterfs/timer.h glusterfs/list.h \
+ glusterfs/inode.h glusterfs/call-stub.h glusterfs/compat.h glusterfs/fd.h \
+ glusterfs/revision.h glusterfs/compat-errno.h glusterfs/gf-event.h \
+ glusterfs/mem-pool.h glusterfs/byte-order.h glusterfs/gf-dirent.h \
+ glusterfs/locking.h glusterfs/syscall.h glusterfs/iobuf.h \
+ glusterfs/globals.h glusterfs/statedump.h glusterfs/checksum.h \
+ glusterfs/daemon.h glusterfs/store.h glusterfs/rbthash.h glusterfs/iatt.h \
+ glusterfs/latency.h glusterfs/mem-types.h glusterfs/syncop.h \
+ glusterfs/cluster-syncop.h glusterfs/graph-utils.h glusterfs/trie.h \
+ glusterfs/refcount.h glusterfs/run.h glusterfs/options.h \
+ glusterfs/lkowner.h glusterfs/fd-lk.h glusterfs/circ-buff.h \
+ glusterfs/event-history.h glusterfs/gidcache.h glusterfs/client_t.h \
+ glusterfs/glusterfs-acl.h glusterfs/glfs-message-id.h \
+ glusterfs/template-component-messages.h glusterfs/strfd.h \
+ glusterfs/syncop-utils.h glusterfs/parse-utils.h \
+ glusterfs/libglusterfs-messages.h glusterfs/lvm-defaults.h \
+ glusterfs/quota-common-utils.h glusterfs/rot-buffs.h \
+ glusterfs/compat-uuid.h glusterfs/upcall-utils.h glusterfs/throttle-tbf.h \
+ glusterfs/events.h glusterfs/atomic.h glusterfs/monitoring.h \
+ glusterfs/async.h glusterfs/glusterfs-fops.h
libglusterfs_ladir = $(includedir)/glusterfs
@@ -70,7 +76,10 @@ noinst_HEADERS = unittest/unittest.h \
$(CONTRIBDIR)/libexecinfo/execinfo_compat.h \
$(CONTRIBDIR)/timer-wheel/timer-wheel.h \
$(CONTRIBDIR)/xxhash/xxhash.h \
- tier-ctr-interface.h
+ $(CONTRIBDIR)/userspace-rcu/wfcqueue.h \
+ $(CONTRIBDIR)/userspace-rcu/wfstack.h \
+ $(CONTRIBDIR)/userspace-rcu/static-wfcqueue.h \
+ $(CONTRIBDIR)/userspace-rcu/static-wfstack.h
eventtypes.h: $(top_srcdir)/events/eventskeygen.py
$(PYTHON) $(top_srcdir)/events/eventskeygen.py C_HEADER
diff --git a/libglusterfs/src/async.c b/libglusterfs/src/async.c
new file mode 100644
index 00000000000..1d6cfa374b6
--- /dev/null
+++ b/libglusterfs/src/async.c
@@ -0,0 +1,720 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc <https://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* To implement an efficient thread pool with minimum contention we have used
+ * the following ideas:
+ *
+ * - The queue of jobs has been implemented using a Wait-Free queue provided
+ * by the userspace-rcu library. This queue requires a mutex when multiple
+ * consumers can be extracting items from it concurrently, but the locked
+ * region is very small, which minimizes the chances of contention. To
+ * further minimize contention, the number of active worker threads that
+ * are accessing the queue is dynamically adjusted so that we always have
+ * the minimum required amount of workers contending for the queue. Adding
+ * new items can be done with a single atomic operation, without locks.
+ *
+ * - All queue management operations, like creating more threads, enabling
+ * sleeping ones, etc. are done by a single thread. This makes it possible
+ * to manage all scaling related information and workers lists without
+ * locks. This functionality is implemented as a role that can be assigned
+ * to any of the worker threads, which avoids that some lengthy operations
+ * could interfere with this task.
+ *
+ * - Management is based on signals. We used signals for management tasks to
+ * avoid multiple system calls for each request (with signals we can wait
+ * for multiple events and get some additional data for each request in a
+ * single call, instead of first polling and then reading).
+ *
+ * TODO: There are some other changes that can take advantage of this new
+ * thread pool.
+ *
+ * - Use this thread pool as the core threading model for synctasks. I
+ * think this would improve synctask performance because I think we
+ * currently have some contention there for some workloads.
+ *
+ * - Implement a per thread timer that will allow adding and removing
+ * timers without using mutexes.
+ *
+ * - Integrate with userspace-rcu library in QSBR mode, allowing
+ * other portions of code to be implemented using RCU-based
+ * structures with a extremely fast read side without contention.
+ *
+ * - Integrate I/O into the thread pool so that the thread pool is
+ * able to efficiently manage all loads and scale dynamically. This
+ * could make it possible to minimize context switching when serving
+ * requests from fuse or network.
+ *
+ * - Dynamically scale the number of workers based on system load.
+ * This will make it possible to reduce contention when system is
+ * heavily loaded, improving performance under these circumstances
+ * (or minimizing performance loss). This will also make it possible
+ * that gluster can coexist with other processes that also consume
+ * CPU, with minimal interference from each other.
+ */
+
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "glusterfs/list.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/async.h"
+
+/* These macros wrap a simple system/library call to check the returned error
+ * and log a message in case of failure. */
+#define GF_ASYNC_CHECK(_func, _args...) \
+ ({ \
+ int32_t __async_error = -_func(_args); \
+ if (caa_unlikely(__async_error != 0)) { \
+ gf_async_error(__async_error, #_func "() failed."); \
+ } \
+ __async_error; \
+ })
+
+#define GF_ASYNC_CHECK_ERRNO(_func, _args...) \
+ ({ \
+ int32_t __async_error = _func(_args); \
+ if (caa_unlikely(__async_error < 0)) { \
+ __async_error = -errno; \
+ gf_async_error(__async_error, #_func "() failed."); \
+ } \
+ __async_error; \
+ })
+
+/* These macros are used when, based on POSIX documentation, the function
+ * should never fail under the conditions we are using it. So any unexpected
+ * error will be handled as a fatal event. It probably means a critical bug
+ * or memory corruption. In both cases we consider that stopping the process
+ * is safer (otherwise it could cause more corruption with unknown effects
+ * that could be worse). */
+#define GF_ASYNC_CANTFAIL(_func, _args...) \
+ do { \
+ int32_t __async_error = -_func(_args); \
+ if (caa_unlikely(__async_error != 0)) { \
+ gf_async_fatal(__async_error, #_func "() failed"); \
+ } \
+ } while (0)
+
+#define GF_ASYNC_CANTFAIL_ERRNO(_func, _args...) \
+ ({ \
+ int32_t __async_error = _func(_args); \
+ if (caa_unlikely(__async_error < 0)) { \
+ __async_error = -errno; \
+ gf_async_fatal(__async_error, #_func "() failed"); \
+ } \
+ __async_error; \
+ })
+
+/* TODO: for now we allocate a static array of workers. There's an issue if we
+ * try to use dynamic memory since these workers are initialized very
+ * early in the process startup and it seems that sometimes not all is
+ * ready to use dynamic memory. */
+static gf_async_worker_t gf_async_workers[GF_ASYNC_MAX_THREADS];
+
+/* This is the only global variable needed to manage the entire framework. */
+gf_async_control_t gf_async_ctrl = {};
+
+static __thread gf_async_worker_t *gf_async_current_worker = NULL;
+
+/* The main function of the worker threads. */
+static void *
+gf_async_worker(void *arg);
+
+static void
+gf_async_sync_init(void)
+{
+ GF_ASYNC_CANTFAIL(pthread_barrier_init, &gf_async_ctrl.sync, NULL, 2);
+}
+
+static void
+gf_async_sync_now(void)
+{
+ int32_t ret;
+
+ ret = pthread_barrier_wait(&gf_async_ctrl.sync);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ GF_ASYNC_CANTFAIL(pthread_barrier_destroy, &gf_async_ctrl.sync);
+ ret = 0;
+ }
+ if (caa_unlikely(ret != 0)) {
+ gf_async_fatal(-ret, "pthread_barrier_wait() failed");
+ }
+}
+
+static void
+gf_async_sigmask_empty(sigset_t *mask)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigemptyset, mask);
+}
+
+static void
+gf_async_sigmask_add(sigset_t *mask, int32_t signal)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigaddset, mask, signal);
+}
+
+static void
+gf_async_sigmask_set(int32_t mode, sigset_t *mask, sigset_t *old)
+{
+ GF_ASYNC_CANTFAIL(pthread_sigmask, mode, mask, old);
+}
+
+static void
+gf_async_sigaction(int32_t signum, const struct sigaction *action,
+ struct sigaction *old)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigaction, signum, action, old);
+}
+
+static int32_t
+gf_async_sigwait(sigset_t *set)
+{
+ int32_t ret, signum;
+
+ do {
+ ret = sigwait(set, &signum);
+ } while (caa_unlikely((ret < 0) && (errno == EINTR)));
+
+ if (caa_unlikely(ret < 0)) {
+ ret = -errno;
+ gf_async_fatal(ret, "sigwait() failed");
+ }
+
+ return signum;
+}
+
+static int32_t
+gf_async_sigtimedwait(sigset_t *set, struct timespec *timeout)
+{
+ int32_t ret;
+
+ do {
+ ret = sigtimedwait(set, NULL, timeout);
+ } while (caa_unlikely((ret < 0) && (errno == EINTR)));
+ if (caa_unlikely(ret < 0)) {
+ ret = -errno;
+ /* EAGAIN means that the timeout has expired, so we allow this error.
+ * Any other error shouldn't happen. */
+ if (caa_unlikely(ret != -EAGAIN)) {
+ gf_async_fatal(ret, "sigtimedwait() failed");
+ }
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static void
+gf_async_sigbroadcast(int32_t signum)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(kill, gf_async_ctrl.pid, signum);
+}
+
+static void
+gf_async_signal_handler(int32_t signum)
+{
+ /* We should never handle a signal in this function. */
+ gf_async_fatal(-EBUSY,
+ "Unexpected processing of signal %d through a handler.",
+ signum);
+}
+
+static void
+gf_async_signal_setup(void)
+{
+ struct sigaction action;
+
+ /* We configure all related signals so that we can detect threads using an
+ * invalid signal mask that doesn't block our critical signal. */
+ memset(&action, 0, sizeof(action));
+ action.sa_handler = gf_async_signal_handler;
+
+ gf_async_sigaction(GF_ASYNC_SIGCTRL, &action, &gf_async_ctrl.handler_ctrl);
+
+ gf_async_sigaction(GF_ASYNC_SIGQUEUE, &action,
+ &gf_async_ctrl.handler_queue);
+}
+
+static void
+gf_async_signal_restore(void)
+{
+ /* Handlers we have previously changed are restored back to their original
+ * value. */
+
+ if (gf_async_ctrl.handler_ctrl.sa_handler != gf_async_signal_handler) {
+ gf_async_sigaction(GF_ASYNC_SIGCTRL, &gf_async_ctrl.handler_ctrl, NULL);
+ }
+
+ if (gf_async_ctrl.handler_queue.sa_handler != gf_async_signal_handler) {
+ gf_async_sigaction(GF_ASYNC_SIGQUEUE, &gf_async_ctrl.handler_queue,
+ NULL);
+ }
+}
+
+static void
+gf_async_signal_flush(void)
+{
+ struct timespec delay;
+
+ delay.tv_sec = 0;
+ delay.tv_nsec = 0;
+
+ /* We read all pending signals so that they don't trigger once the signal
+ * mask of some thread is changed. */
+ while (gf_async_sigtimedwait(&gf_async_ctrl.sigmask_ctrl, &delay) > 0) {
+ }
+ while (gf_async_sigtimedwait(&gf_async_ctrl.sigmask_queue, &delay) > 0) {
+ }
+}
+
+static int32_t
+gf_async_thread_create(pthread_t *thread, int32_t id, void *data)
+{
+ int32_t ret;
+
+ ret = gf_thread_create(thread, NULL, gf_async_worker, data,
+ GF_ASYNC_THREAD_NAME "%u", id);
+ if (caa_unlikely(ret < 0)) {
+ /* TODO: gf_thread_create() should return a more specific error
+ * code. */
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+gf_async_thread_wait(pthread_t thread)
+{
+ /* TODO: this is a blocking call executed inside one of the workers of the
+ * thread pool. This is bad, but this is only executed once we have
+ * received a notification from the thread that it's terminating, so
+ * this should return almost immediately. However, to be more robust
+ * it would be better to use pthread_timedjoin_np() (or even a call
+ * to pthread_tryjoin_np() followed by a delayed recheck if it
+ * fails), but they are not portable. We should see how to do this
+ * in other platforms. */
+ GF_ASYNC_CANTFAIL(pthread_join, thread, NULL);
+}
+
+static int32_t
+gf_async_worker_create(void)
+{
+ struct cds_wfs_node *node;
+ gf_async_worker_t *worker;
+ uint32_t counts, running, max;
+ int32_t ret;
+
+ node = __cds_wfs_pop_blocking(&gf_async_ctrl.available);
+ if (caa_unlikely(node == NULL)) {
+ /* There are no more available workers. We have all threads running. */
+ return 1;
+ }
+ cds_wfs_node_init(node);
+
+ ret = 1;
+
+ counts = uatomic_read(&gf_async_ctrl.counts);
+ max = uatomic_read(&gf_async_ctrl.max_threads);
+ running = GF_ASYNC_COUNT_RUNNING(counts);
+ if (running < max) {
+ uatomic_add(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(1, 0));
+
+ worker = caa_container_of(node, gf_async_worker_t, stack);
+
+ ret = gf_async_thread_create(&worker->thread, worker->id, worker);
+ if (caa_likely(ret >= 0)) {
+ return 0;
+ }
+
+ uatomic_add(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(-1, 0));
+ }
+
+ cds_wfs_push(&gf_async_ctrl.available, node);
+
+ return ret;
+}
+
+static void
+gf_async_worker_enable(void)
+{
+ /* This will wake one of the spare workers. If all workers are busy now,
+ * the signal will be queued so that the first one that completes its
+ * work will become the leader. */
+ gf_async_sigbroadcast(GF_ASYNC_SIGCTRL);
+
+ /* We have consumed a spare worker. We create another one for future
+ * needs. */
+ gf_async_worker_create();
+}
+
+static void
+gf_async_worker_wait(void)
+{
+ int32_t signum;
+
+ signum = gf_async_sigwait(&gf_async_ctrl.sigmask_ctrl);
+ if (caa_unlikely(signum != GF_ASYNC_SIGCTRL)) {
+ gf_async_fatal(-EINVAL, "Worker received an unexpected signal (%d)",
+ signum);
+ }
+}
+
+static void
+gf_async_leader_wait(void)
+{
+ int32_t signum;
+
+ signum = gf_async_sigwait(&gf_async_ctrl.sigmask_queue);
+ if (caa_unlikely(signum != GF_ASYNC_SIGQUEUE)) {
+ gf_async_fatal(-EINVAL, "Leader received an unexpected signal (%d)",
+ signum);
+ }
+}
+
+static void
+gf_async_run(struct cds_wfcq_node *node)
+{
+ gf_async_t *async;
+
+ /* We've just got work from the queue. Process it. */
+ async = caa_container_of(node, gf_async_t, queue);
+ /* TODO: remove dependency from THIS and xl. */
+ THIS = async->xl;
+ async->cbk(async->xl, async);
+}
+
+static void
+gf_async_worker_run(void)
+{
+ struct cds_wfcq_node *node;
+
+ do {
+ /* We keep executing jobs from the queue while it's not empty. Note
+ * that while we do this, we are ignoring any stop request. That's
+ * fine, since we need to process our own 'join' messages to fully
+ * terminate all threads. Note that normal jobs should have already
+ * completed once a stop request is received. */
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ if (node != NULL) {
+ gf_async_run(node);
+ }
+ } while (node != NULL);
+
+ /* TODO: I've tried to keep the worker looking at the queue for some small
+ * amount of time in a busy loop to see if more jobs come soon. With
+ * this I attempted to avoid the overhead of signal management if
+ * jobs come fast enough. However experimental results seem to
+ * indicate that doing this, CPU utilization grows and performance
+ * is actually reduced. We need to see if that's because I used bad
+ * parameters or it's really better to do it as it's done now. */
+}
+
+static void
+gf_async_leader_run(void)
+{
+ struct cds_wfcq_node *node;
+
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ while (caa_unlikely(node == NULL)) {
+ gf_async_leader_wait();
+
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ }
+
+ /* Activate the next available worker thread. It will become the new
+ * leader. */
+ gf_async_worker_enable();
+
+ gf_async_run(node);
+}
+
+static uint32_t
+gf_async_stop_check(gf_async_worker_t *worker)
+{
+ uint32_t counts, old, running, max;
+
+ /* First we check if we should stop without doing any costly atomic
+ * operation. */
+ old = uatomic_read(&gf_async_ctrl.counts);
+ max = uatomic_read(&gf_async_ctrl.max_threads);
+ running = GF_ASYNC_COUNT_RUNNING(old);
+ while (running > max) {
+ /* There are too many threads. We try to stop the current worker. */
+ counts = uatomic_cmpxchg(&gf_async_ctrl.counts, old,
+ old + GF_ASYNC_COUNTS(-1, 1));
+ if (old != counts) {
+ /* Another thread has just updated the counts. We need to retry. */
+ old = counts;
+ running = GF_ASYNC_COUNT_RUNNING(old);
+
+ continue;
+ }
+
+ running--;
+ worker->running = false;
+ }
+
+ return running;
+}
+
+static void
+gf_async_stop_all(xlator_t *xl, gf_async_t *async)
+{
+ if (gf_async_stop_check(gf_async_current_worker) > 0) {
+ /* There are more workers running. We propagate the stop request to
+ * them. */
+ gf_async(async, xl, gf_async_stop_all);
+ }
+}
+
+static void
+gf_async_join(xlator_t *xl, gf_async_t *async)
+{
+ gf_async_worker_t *worker;
+
+ worker = caa_container_of(async, gf_async_worker_t, async);
+
+ gf_async_thread_wait(worker->thread);
+
+ cds_wfs_push(&gf_async_ctrl.available, &worker->stack);
+}
+
+static void
+gf_async_terminate(gf_async_worker_t *worker)
+{
+ uint32_t counts;
+
+ counts = uatomic_add_return(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(0, -1));
+ if (counts == 0) {
+ /* This is the termination of the last worker thread. We need to
+ * synchronize the main thread that is waiting for all workers to
+ * finish. */
+ gf_async_ctrl.sync_thread = worker->thread;
+
+ gf_async_sync_now();
+ } else {
+ /* Force someone else to join this thread to release resources. */
+ gf_async(&worker->async, THIS, gf_async_join);
+ }
+}
+
+static void *
+gf_async_worker(void *arg)
+{
+ gf_async_worker_t *worker;
+
+ worker = (gf_async_worker_t *)arg;
+ gf_async_current_worker = worker;
+
+ worker->running = true;
+ do {
+ /* This thread does nothing until someone enables it to become a
+ * leader. */
+ gf_async_worker_wait();
+
+ /* This thread is now a leader. It will process jobs from the queue
+ * and, if necessary, enable another worker and transfer leadership
+ * to it. */
+ gf_async_leader_run();
+
+ /* This thread is not a leader anymore. It will continue processing
+ * queued jobs until it becomes empty. */
+ gf_async_worker_run();
+
+ /* Stop the current thread if there are too many threads running. */
+ gf_async_stop_check(worker);
+ } while (worker->running);
+
+ gf_async_terminate(worker);
+
+ return NULL;
+}
+
+static void
+gf_async_cleanup(void)
+{
+ /* We do some basic initialization of the global variable 'gf_async_ctrl'
+ * so that it's put into a relatively consistent state. */
+
+ gf_async_ctrl.enabled = false;
+
+ gf_async_ctrl.pid = 0;
+ gf_async_sigmask_empty(&gf_async_ctrl.sigmask_ctrl);
+ gf_async_sigmask_empty(&gf_async_ctrl.sigmask_queue);
+
+ /* This is used to later detect if the handler of these signals have been
+ * changed or not. */
+ gf_async_ctrl.handler_ctrl.sa_handler = gf_async_signal_handler;
+ gf_async_ctrl.handler_queue.sa_handler = gf_async_signal_handler;
+
+ gf_async_ctrl.table = NULL;
+ gf_async_ctrl.max_threads = 0;
+ gf_async_ctrl.counts = 0;
+}
+
+void
+gf_async_fini(void)
+{
+ gf_async_t async;
+
+ if (uatomic_read(&gf_async_ctrl.counts) != 0) {
+ /* We ensure that all threads will quit on the next check. */
+ gf_async_ctrl.max_threads = 0;
+
+ /* Send the stop request to the thread pool. This will cause the
+ * execution of gf_async_stop_all() by one of the worker threads which,
+ * eventually, will terminate all worker threads. */
+ gf_async(&async, THIS, gf_async_stop_all);
+
+ /* We synchronize here with the last thread. */
+ gf_async_sync_now();
+
+ /* We have just synchronized with the latest thread. Now just wait for
+ * it to terminate. */
+ gf_async_thread_wait(gf_async_ctrl.sync_thread);
+
+ gf_async_signal_flush();
+ }
+
+ gf_async_signal_restore();
+
+ gf_async_cleanup();
+}
+
+void
+gf_async_adjust_threads(int32_t threads)
+{
+ if (threads == 0) {
+ /* By default we allow a maximum of 2 * #cores worker threads. This
+ * value is to try to accommodate threads that will do some I/O. Having
+ * more threads than cores we can keep CPU busy even if some threads
+ * are blocked for I/O. In the most efficient case, we can have #cores
+ * computing threads and #cores blocked threads on I/O. However this is
+ * hard to achieve because we can end with more than #cores computing
+ * threads, which won't provide a real benefit and will increase
+ * contention.
+ *
+ * TODO: implement a more intelligent dynamic maximum based on CPU
+ * usage and/or system load. */
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * 2;
+ if (threads < 0) {
+ /* If we can't get the current number of processors, we pick a
+ * random number. */
+ threads = 16;
+ }
+ }
+ if (threads > GF_ASYNC_MAX_THREADS) {
+ threads = GF_ASYNC_MAX_THREADS;
+ }
+ uatomic_set(&gf_async_ctrl.max_threads, threads);
+}
+
+int32_t
+gf_async_init(glusterfs_ctx_t *ctx)
+{
+ sigset_t set;
+ gf_async_worker_t *worker;
+ uint32_t i;
+ int32_t ret;
+ bool running;
+
+ gf_async_cleanup();
+
+ if (!ctx->cmd_args.global_threading ||
+ (ctx->process_mode == GF_GLUSTERD_PROCESS)) {
+ return 0;
+ }
+
+ /* At the init time, the maximum number of threads has not yet been
+ * configured. We use a small starting value that will be layer dynamically
+ * adjusted when ctx->config.max_threads is updated. */
+ gf_async_adjust_threads(GF_ASYNC_SPARE_THREADS + 1);
+
+ gf_async_ctrl.pid = getpid();
+
+ __cds_wfs_init(&gf_async_ctrl.available);
+ cds_wfcq_init(&gf_async_ctrl.queue.head, &gf_async_ctrl.queue.tail);
+
+ gf_async_sync_init();
+
+ /* TODO: it would be cleaner to use dynamic memory, but at this point some
+ * memory management resources are not yet initialized. */
+ gf_async_ctrl.table = gf_async_workers;
+
+ /* We keep all workers in a stack. It will be used when a new thread needs
+ * to be created. */
+ for (i = GF_ASYNC_MAX_THREADS; i > 0; i--) {
+ worker = &gf_async_ctrl.table[i - 1];
+
+ worker->id = i - 1;
+ cds_wfs_node_init(&worker->stack);
+ cds_wfs_push(&gf_async_ctrl.available, &worker->stack);
+ }
+
+ /* Prepare the signal mask for regular workers and the leader. */
+ gf_async_sigmask_add(&gf_async_ctrl.sigmask_ctrl, GF_ASYNC_SIGCTRL);
+ gf_async_sigmask_add(&gf_async_ctrl.sigmask_queue, GF_ASYNC_SIGQUEUE);
+
+ /* TODO: this is needed to block our special signals in the current thread
+ * and all children that it starts. It would be cleaner to do it when
+ * signals are initialized, but there doesn't seem to be a unique
+ * place to do that, so for now we do it here. */
+ gf_async_sigmask_empty(&set);
+ gf_async_sigmask_add(&set, GF_ASYNC_SIGCTRL);
+ gf_async_sigmask_add(&set, GF_ASYNC_SIGQUEUE);
+ gf_async_sigmask_set(SIG_BLOCK, &set, NULL);
+
+ /* Configure the signal handlers. This is mostly for safety, not really
+ * needed, but it doesn't hurt. Note that the caller must ensure that the
+ * signals we need to run are already blocked in any thread already
+ * started. Otherwise this won't work. */
+ gf_async_signal_setup();
+
+ running = false;
+
+ /* We start the spare workers + 1 for the leader. */
+ for (i = 0; i < GF_ASYNC_SPARE_THREADS; i++) {
+ ret = gf_async_worker_create();
+ if (caa_unlikely(ret < 0)) {
+ /* This is the initial start up so we enforce that the spare
+ * threads are created. If this fails at the beginning, it's very
+ * unlikely that the async workers could do its job, so we abort
+ * the initialization. */
+ goto out;
+ }
+
+ /* Once the first thread is started, we can enable it to become the
+ * initial leader. */
+ if ((ret == 0) && !running) {
+ running = true;
+ gf_async_worker_enable();
+ }
+ }
+
+ if (caa_unlikely(!running)) {
+ gf_async_fatal(-ENOMEM, "No worker thread has started");
+ }
+
+ gf_async_ctrl.enabled = true;
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ gf_async_error(ret, "Unable to initialize the thread pool.");
+ gf_async_fini();
+ }
+
+ return ret;
+}
diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c
index 7e2713fa384..ee84f08acd4 100644
--- a/libglusterfs/src/call-stub.c
+++ b/libglusterfs/src/call-stub.c
@@ -11,12 +11,12 @@
#include <openssl/md5.h>
#include <inttypes.h>
-#include "call-stub.h"
-#include "mem-types.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/call-stub.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/libglusterfs-messages.h"
static call_stub_t *
-stub_new(call_frame_t *frame, char wind, glusterfs_fop_t fop)
+stub_new(call_frame_t *frame, const char wind, const glusterfs_fop_t fop)
{
call_stub_t *new = NULL;
@@ -41,7 +41,6 @@ fop_lookup_stub(call_frame_t *frame, fop_lookup_t fn, loc_t *loc, dict_t *xdata)
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_LOOKUP);
@@ -60,8 +59,6 @@ fop_lookup_cbk_stub(call_frame_t *frame, fop_lookup_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_LOOKUP);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -77,7 +74,6 @@ fop_stat_stub(call_frame_t *frame, fop_stat_t fn, loc_t *loc, dict_t *xdata)
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_STAT);
@@ -95,8 +91,6 @@ fop_stat_cbk_stub(call_frame_t *frame, fop_stat_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_STAT);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -111,8 +105,6 @@ fop_fstat_stub(call_frame_t *frame, fop_fstat_t fn, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 1, GF_FOP_FSTAT);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -128,8 +120,6 @@ fop_fstat_cbk_stub(call_frame_t *frame, fop_fstat_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FSTAT);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -145,7 +135,6 @@ fop_truncate_stub(call_frame_t *frame, fop_truncate_t fn, loc_t *loc, off_t off,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_TRUNCATE);
@@ -164,8 +153,6 @@ fop_truncate_cbk_stub(call_frame_t *frame, fop_truncate_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_TRUNCATE);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -182,8 +169,6 @@ fop_ftruncate_stub(call_frame_t *frame, fop_ftruncate_t fn, fd_t *fd, off_t off,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 1, GF_FOP_FTRUNCATE);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -201,8 +186,6 @@ fop_ftruncate_cbk_stub(call_frame_t *frame, fop_ftruncate_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FTRUNCATE);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -220,7 +203,6 @@ fop_access_stub(call_frame_t *frame, fop_access_t fn, loc_t *loc, int32_t mask,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_ACCESS);
@@ -238,8 +220,6 @@ fop_access_cbk_stub(call_frame_t *frame, fop_access_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_ACCESS);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -255,7 +235,6 @@ fop_readlink_stub(call_frame_t *frame, fop_readlink_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_READLINK);
@@ -274,8 +253,6 @@ fop_readlink_cbk_stub(call_frame_t *frame, fop_readlink_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_READLINK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -292,7 +269,6 @@ fop_mknod_stub(call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_MKNOD);
@@ -312,8 +288,6 @@ fop_mknod_cbk_stub(call_frame_t *frame, fop_mknod_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_MKNOD);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -330,7 +304,6 @@ fop_mkdir_stub(call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_MKDIR);
@@ -350,8 +323,6 @@ fop_mkdir_cbk_stub(call_frame_t *frame, fop_mkdir_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_MKDIR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -368,7 +339,6 @@ fop_unlink_stub(call_frame_t *frame, fop_unlink_t fn, loc_t *loc, int xflag,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_UNLINK);
@@ -388,8 +358,6 @@ fop_unlink_cbk_stub(call_frame_t *frame, fop_unlink_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_UNLINK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -406,7 +374,6 @@ fop_rmdir_stub(call_frame_t *frame, fop_rmdir_t fn, loc_t *loc, int flags,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_RMDIR);
@@ -426,8 +393,6 @@ fop_rmdir_cbk_stub(call_frame_t *frame, fop_rmdir_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_RMDIR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -444,7 +409,6 @@ fop_symlink_stub(call_frame_t *frame, fop_symlink_t fn, const char *linkname,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
GF_VALIDATE_OR_GOTO("call-stub", linkname, out);
@@ -465,8 +429,6 @@ fop_symlink_cbk_stub(call_frame_t *frame, fop_symlink_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_SYMLINK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -483,7 +445,6 @@ fop_rename_stub(call_frame_t *frame, fop_rename_t fn, loc_t *oldloc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", oldloc, out);
GF_VALIDATE_OR_GOTO("call-stub", newloc, out);
@@ -505,8 +466,6 @@ fop_rename_cbk_stub(call_frame_t *frame, fop_rename_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_RENAME);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -523,7 +482,6 @@ fop_link_stub(call_frame_t *frame, fop_link_t fn, loc_t *oldloc, loc_t *newloc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", oldloc, out);
GF_VALIDATE_OR_GOTO("call-stub", newloc, out);
@@ -544,8 +502,6 @@ fop_link_cbk_stub(call_frame_t *frame, fop_link_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_LINK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -562,7 +518,6 @@ fop_create_stub(call_frame_t *frame, fop_create_t fn, loc_t *loc, int32_t flags,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_CREATE);
@@ -582,8 +537,6 @@ fop_create_cbk_stub(call_frame_t *frame, fop_create_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_CREATE);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -600,7 +553,6 @@ fop_open_stub(call_frame_t *frame, fop_open_t fn, loc_t *loc, int32_t flags,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_OPEN);
@@ -618,8 +570,6 @@ fop_open_cbk_stub(call_frame_t *frame, fop_open_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_OPEN);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -635,8 +585,6 @@ fop_readv_stub(call_frame_t *frame, fop_readv_t fn, fd_t *fd, size_t size,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 1, GF_FOP_READ);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -653,8 +601,6 @@ fop_readv_cbk_stub(call_frame_t *frame, fop_readv_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_READ);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -672,7 +618,6 @@ fop_writev_stub(call_frame_t *frame, fop_writev_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", vector, out);
stub = stub_new(frame, 1, GF_FOP_WRITE);
@@ -692,8 +637,6 @@ fop_writev_cbk_stub(call_frame_t *frame, fop_writev_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_WRITE);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -709,8 +652,6 @@ fop_flush_stub(call_frame_t *frame, fop_flush_t fn, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 1, GF_FOP_FLUSH);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -726,8 +667,6 @@ fop_flush_cbk_stub(call_frame_t *frame, fop_flush_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FLUSH);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -743,8 +682,6 @@ fop_fsync_stub(call_frame_t *frame, fop_fsync_t fn, fd_t *fd, int32_t datasync,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 1, GF_FOP_FSYNC);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -761,8 +698,6 @@ fop_fsync_cbk_stub(call_frame_t *frame, fop_fsync_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FSYNC);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -779,7 +714,6 @@ fop_opendir_stub(call_frame_t *frame, fop_opendir_t fn, loc_t *loc, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_OPENDIR);
@@ -797,8 +731,6 @@ fop_opendir_cbk_stub(call_frame_t *frame, fop_opendir_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_OPENDIR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -814,8 +746,6 @@ fop_fsyncdir_stub(call_frame_t *frame, fop_fsyncdir_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 1, GF_FOP_FSYNCDIR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -831,8 +761,6 @@ fop_fsyncdir_cbk_stub(call_frame_t *frame, fop_fsyncdir_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FSYNCDIR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -847,7 +775,6 @@ fop_statfs_stub(call_frame_t *frame, fop_statfs_t fn, loc_t *loc, dict_t *xdata)
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_STATFS);
@@ -865,8 +792,6 @@ fop_statfs_cbk_stub(call_frame_t *frame, fop_statfs_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_STATFS);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -882,7 +807,6 @@ fop_setxattr_stub(call_frame_t *frame, fop_setxattr_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_SETXATTR);
@@ -900,8 +824,6 @@ fop_setxattr_cbk_stub(call_frame_t *frame, fop_setxattr_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_SETXATTR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -917,7 +839,6 @@ fop_getxattr_stub(call_frame_t *frame, fop_getxattr_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
stub = stub_new(frame, 1, GF_FOP_GETXATTR);
@@ -936,8 +857,6 @@ fop_getxattr_cbk_stub(call_frame_t *frame, fop_getxattr_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_GETXATTR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -953,7 +872,6 @@ fop_fsetxattr_stub(call_frame_t *frame, fop_fsetxattr_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fd, out);
stub = stub_new(frame, 1, GF_FOP_FSETXATTR);
@@ -971,8 +889,6 @@ fop_fsetxattr_cbk_stub(call_frame_t *frame, fop_fsetxattr_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FSETXATTR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -988,7 +904,6 @@ fop_fgetxattr_stub(call_frame_t *frame, fop_fgetxattr_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fd, out);
stub = stub_new(frame, 1, GF_FOP_FGETXATTR);
@@ -1007,8 +922,6 @@ fop_fgetxattr_cbk_stub(call_frame_t *frame, fop_fgetxattr_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_GETXATTR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1024,7 +937,6 @@ fop_removexattr_stub(call_frame_t *frame, fop_removexattr_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", loc, out);
GF_VALIDATE_OR_GOTO("call-stub", name, out);
@@ -1043,8 +955,6 @@ fop_removexattr_cbk_stub(call_frame_t *frame, fop_removexattr_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_REMOVEXATTR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1060,7 +970,6 @@ fop_fremovexattr_stub(call_frame_t *frame, fop_fremovexattr_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fd, out);
GF_VALIDATE_OR_GOTO("call-stub", name, out);
@@ -1079,8 +988,6 @@ fop_fremovexattr_cbk_stub(call_frame_t *frame, fop_fremovexattr_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FREMOVEXATTR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1096,7 +1003,6 @@ fop_lk_stub(call_frame_t *frame, fop_lk_t fn, fd_t *fd, int32_t cmd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", lock, out);
stub = stub_new(frame, 1, GF_FOP_LK);
@@ -1114,8 +1020,6 @@ fop_lk_cbk_stub(call_frame_t *frame, fop_lk_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_LK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1131,7 +1035,6 @@ fop_inodelk_stub(call_frame_t *frame, fop_inodelk_t fn, const char *volume,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", lock, out);
stub = stub_new(frame, 1, GF_FOP_INODELK);
@@ -1149,8 +1052,6 @@ fop_inodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_INODELK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1166,7 +1067,6 @@ fop_finodelk_stub(call_frame_t *frame, fop_finodelk_t fn, const char *volume,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", lock, out);
stub = stub_new(frame, 1, GF_FOP_FINODELK);
@@ -1185,8 +1085,6 @@ fop_finodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FINODELK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1203,8 +1101,6 @@ fop_entrylk_stub(call_frame_t *frame, fop_entrylk_t fn, const char *volume,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 1, GF_FOP_ENTRYLK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1221,8 +1117,6 @@ fop_entrylk_cbk_stub(call_frame_t *frame, fop_entrylk_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_ENTRYLK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1239,8 +1133,6 @@ fop_fentrylk_stub(call_frame_t *frame, fop_fentrylk_t fn, const char *volume,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 1, GF_FOP_FENTRYLK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1256,8 +1148,6 @@ fop_fentrylk_cbk_stub(call_frame_t *frame, fop_fentrylk_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FENTRYLK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1274,8 +1164,6 @@ fop_readdirp_cbk_stub(call_frame_t *frame, fop_readdirp_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_READDIRP);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1291,8 +1179,6 @@ fop_readdir_cbk_stub(call_frame_t *frame, fop_readdir_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_READDIR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1338,7 +1224,6 @@ fop_rchecksum_stub(call_frame_t *frame, fop_rchecksum_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fd, out);
stub = stub_new(frame, 1, GF_FOP_RCHECKSUM);
@@ -1357,8 +1242,6 @@ fop_rchecksum_cbk_stub(call_frame_t *frame, fop_rchecksum_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_RCHECKSUM);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1375,8 +1258,6 @@ fop_xattrop_cbk_stub(call_frame_t *frame, fop_xattrop_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_XATTROP);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1392,7 +1273,6 @@ fop_fxattrop_cbk_stub(call_frame_t *frame, fop_fxattrop_cbk_t fn,
dict_t *xdata)
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
stub = stub_new(frame, 0, GF_FOP_FXATTROP);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1409,7 +1289,6 @@ fop_xattrop_stub(call_frame_t *frame, fop_xattrop_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", xattr, out);
stub = stub_new(frame, 1, GF_FOP_XATTROP);
@@ -1427,7 +1306,6 @@ fop_fxattrop_stub(call_frame_t *frame, fop_fxattrop_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", xattr, out);
stub = stub_new(frame, 1, GF_FOP_FXATTROP);
@@ -1446,8 +1324,6 @@ fop_setattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_SETATTR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1465,8 +1341,6 @@ fop_fsetattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FSETATTR);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1483,7 +1357,6 @@ fop_setattr_stub(call_frame_t *frame, fop_setattr_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_SETATTR);
@@ -1501,7 +1374,6 @@ fop_fsetattr_stub(call_frame_t *frame, fop_fsetattr_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_FSETATTR);
@@ -1520,8 +1392,6 @@ fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_FALLOCATE);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1539,7 +1409,6 @@ fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_FALLOCATE);
@@ -1558,8 +1427,6 @@ fop_discard_cbk_stub(call_frame_t *frame, fop_discard_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_DISCARD);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1577,7 +1444,6 @@ fop_discard_stub(call_frame_t *frame, fop_discard_t fn, fd_t *fd, off_t offset,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_DISCARD);
@@ -1596,8 +1462,6 @@ fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_ZEROFILL);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1615,7 +1479,6 @@ fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_ZEROFILL);
@@ -1633,8 +1496,6 @@ fop_ipc_cbk_stub(call_frame_t *frame, fop_ipc_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_IPC);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1650,7 +1511,6 @@ fop_ipc_stub(call_frame_t *frame, fop_ipc_t fn, int32_t op, dict_t *xdata)
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_IPC);
@@ -1668,8 +1528,6 @@ fop_lease_cbk_stub(call_frame_t *frame, fop_lease_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_LEASE);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1685,7 +1543,6 @@ fop_lease_stub(call_frame_t *frame, fop_lease_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
GF_VALIDATE_OR_GOTO("call-stub", lease, out);
@@ -1704,8 +1561,6 @@ fop_seek_cbk_stub(call_frame_t *frame, fop_seek_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_SEEK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1722,7 +1577,6 @@ fop_seek_stub(call_frame_t *frame, fop_seek_t fn, fd_t *fd, off_t offset,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_SEEK);
@@ -1741,8 +1595,6 @@ fop_getactivelk_cbk_stub(call_frame_t *frame, fop_getactivelk_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_GETACTIVELK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1759,7 +1611,6 @@ fop_getactivelk_stub(call_frame_t *frame, fop_getactivelk_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_GETACTIVELK);
@@ -1781,8 +1632,6 @@ fop_setactivelk_cbk_stub(call_frame_t *frame, fop_setactivelk_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_SETACTIVELK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1803,7 +1652,6 @@ fop_setactivelk_stub(call_frame_t *frame, fop_setactivelk_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_SETACTIVELK);
@@ -1818,13 +1666,55 @@ out:
}
call_stub_t *
+fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t len, uint32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+
+ stub = stub_new(frame, 1, GF_FOP_COPY_FILE_RANGE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn.copy_file_range = fn;
+
+ args_copy_file_range_store(&stub->args, fd_in, off_in, fd_out, off_out, len,
+ flags, xdata);
+
+out:
+ return stub;
+}
+
+call_stub_t *
+fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+
+ stub = stub_new(frame, 0, GF_FOP_COPY_FILE_RANGE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn_cbk.copy_file_range = fn;
+ args_copy_file_range_cbk_store(&stub->args_cbk, op_ret, op_errno, stbuf,
+ prebuf_dst, postbuf_dst, xdata);
+
+out:
+ return stub;
+}
+
+call_stub_t *
fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode,
mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", vector, out);
stub = stub_new(frame, 1, GF_FOP_PUT);
@@ -1844,8 +1734,6 @@ fop_put_cbk_stub(call_frame_t *frame, fop_put_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_PUT);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1862,7 +1750,6 @@ fop_icreate_stub(call_frame_t *frame, fop_icreate_t fn, loc_t *loc, mode_t mode,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_ICREATE);
@@ -1902,8 +1789,6 @@ fop_icreate_cbk_stub(call_frame_t *frame, fop_icreate_cbk_t fn, int32_t op_ret,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_ICREATE);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -1921,7 +1806,6 @@ fop_namelink_stub(call_frame_t *frame, fop_namelink_t fn, loc_t *loc,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
GF_VALIDATE_OR_GOTO("call-stub", fn, out);
stub = stub_new(frame, 1, GF_FOP_NAMELINK);
@@ -1961,8 +1845,6 @@ fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn,
{
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO("call-stub", frame, out);
-
stub = stub_new(frame, 0, GF_FOP_NAMELINK);
GF_VALIDATE_OR_GOTO("call-stub", stub, out);
@@ -2213,6 +2095,13 @@ call_resume_wind(call_stub_t *stub)
stub->args.iobref, stub->args.xattr, stub->args.xdata);
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ stub->fn.copy_file_range(
+ stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.off_in, stub->args.fd_dst, stub->args.off_out,
+ stub->args.size, stub->args.flags, stub->args.xdata);
+ break;
+
default:
gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,
LG_MSG_INVALID_ENTRY,
@@ -2439,6 +2328,12 @@ call_resume_unwind(call_stub_t *stub)
stub->args_cbk.xdata);
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ STUB_UNWIND(stub, copy_file_range, &stub->args_cbk.stat,
+ &stub->args_cbk.prestat, &stub->args_cbk.poststat,
+ stub->args_cbk.xdata);
+ break;
+
default:
gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,
LG_MSG_INVALID_ENTRY,
diff --git a/libglusterfs/src/circ-buff.c b/libglusterfs/src/circ-buff.c
index cb37ed30ea2..913115c7be1 100644
--- a/libglusterfs/src/circ-buff.c
+++ b/libglusterfs/src/circ-buff.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "circ-buff.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/circ-buff.h"
+#include "glusterfs/libglusterfs-messages.h"
void
cb_destroy_data(circular_buffer_t *cb, void (*destroy_buffer_data)(void *data))
diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
index 586cbd84e5c..9d377c3c2e1 100644
--- a/libglusterfs/src/client_t.c
+++ b/libglusterfs/src/client_t.c
@@ -8,13 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "dict.h"
-#include "statedump.h"
-#include "client_t.h"
-#include "list.h"
-#include "rpcsvc.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/client_t.h"
+#include "glusterfs/list.h"
+#include "glusterfs/libglusterfs-messages.h"
static int
gf_client_chain_client_entries(cliententry_t *entries, uint32_t startidx,
@@ -110,50 +109,13 @@ gf_clienttable_alloc(void)
return clienttable;
}
-void
-gf_client_clienttable_destroy(clienttable_t *clienttable)
-{
- client_t *client = NULL;
- cliententry_t *cliententries = NULL;
- uint32_t client_count = 0;
- int32_t i = 0;
-
- if (!clienttable) {
- gf_msg_callingfn("client_t", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
- "!clienttable");
- return;
- }
-
- LOCK(&clienttable->lock);
- {
- client_count = clienttable->max_clients;
- clienttable->max_clients = 0;
- cliententries = clienttable->cliententries;
- clienttable->cliententries = NULL;
- }
- UNLOCK(&clienttable->lock);
-
- if (cliententries != NULL) {
- for (i = 0; i < client_count; i++) {
- client = cliententries[i].client;
- if (client != NULL) {
- gf_client_unref(client);
- }
- }
-
- GF_FREE(cliententries);
- LOCK_DESTROY(&clienttable->lock);
- GF_FREE(clienttable);
- }
-}
-
/*
* Increments ref.bind if the client is already present or creates a new
* client with ref.bind = 1,ref.count = 1 it signifies that
* as long as ref.bind is > 0 client should be alive.
*/
client_t *
-gf_client_get(xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid,
+gf_client_get(xlator_t *this, client_auth_data_t *cred, char *client_uid,
char *subdir_mount)
{
client_t *client = NULL;
@@ -181,11 +143,10 @@ gf_client_get(xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid,
* if auth was used, matching auth flavour and data
*/
if (strcmp(client_uid, client->client_uid) == 0 &&
- (cred->flavour != AUTH_NONE &&
- (cred->flavour == client->auth.flavour &&
- (size_t)cred->datalen == client->auth.len &&
- memcmp(cred->authdata, client->auth.data, client->auth.len) ==
- 0))) {
+ (cred->flavour && (cred->flavour == client->auth.flavour &&
+ (size_t)cred->datalen == client->auth.len &&
+ memcmp(cred->authdata, client->auth.data,
+ client->auth.len) == 0))) {
GF_ATOMIC_INC(client->bind);
goto unlock;
}
@@ -224,9 +185,10 @@ gf_client_get(xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid,
GF_ATOMIC_INIT(client->bind, 1);
GF_ATOMIC_INIT(client->count, 1);
+ GF_ATOMIC_INIT(client->fd_cnt, 0);
client->auth.flavour = cred->flavour;
- if (cred->flavour != AUTH_NONE) {
+ if (cred->flavour) {
client->auth.data = GF_MALLOC(cred->datalen, gf_common_mt_client_t);
if (client->auth.data == NULL) {
GF_FREE(client->scratch_ctx.ctx);
@@ -352,8 +314,6 @@ client_destroy(client_t *client)
clienttable = client->this->ctx->clienttable;
- LOCK_DESTROY(&client->scratch_ctx.lock);
-
LOCK(&clienttable->lock);
{
clienttable->cliententries[client->tbl_index].client = NULL;
@@ -371,6 +331,8 @@ client_destroy(client_t *client)
if (client->subdir_inode)
inode_unref(client->subdir_inode);
+ LOCK_DESTROY(&client->scratch_ctx.lock);
+
GF_FREE(client->auth.data);
GF_FREE(client->auth.username);
GF_FREE(client->auth.passwd);
@@ -580,62 +542,6 @@ client_ctx_del(client_t *client, void *key, void **value)
}
void
-client_dump(client_t *client, char *prefix)
-{
- if (!client)
- return;
-
- gf_proc_dump_write("refcount", "%" GF_PRI_ATOMIC,
- GF_ATOMIC_GET(client->count));
-}
-
-void
-cliententry_dump(cliententry_t *cliententry, char *prefix)
-{
- if (!cliententry)
- return;
-
- if (GF_CLIENTENTRY_ALLOCATED != cliententry->next_free)
- return;
-
- if (cliententry->client)
- client_dump(cliententry->client, prefix);
-}
-
-void
-clienttable_dump(clienttable_t *clienttable, char *prefix)
-{
- int i = 0;
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN] = {0};
-
- if (!clienttable)
- return;
-
- ret = TRY_LOCK(&clienttable->lock);
- {
- if (ret) {
- gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
- "Unable to acquire lock");
- return;
- }
- gf_proc_dump_build_key(key, prefix, "maxclients");
- gf_proc_dump_write(key, "%d", clienttable->max_clients);
- gf_proc_dump_build_key(key, prefix, "first_free");
- gf_proc_dump_write(key, "%d", clienttable->first_free);
- for (i = 0; i < clienttable->max_clients; i++) {
- if (GF_CLIENTENTRY_ALLOCATED ==
- clienttable->cliententries[i].next_free) {
- gf_proc_dump_build_key(key, prefix, "cliententry[%d]", i);
- gf_proc_dump_add_section("%s", key);
- cliententry_dump(&clienttable->cliententries[i], key);
- }
- }
- }
- UNLOCK(&clienttable->lock);
-}
-
-void
client_ctx_dump(client_t *client, char *prefix)
{
#if 0 /* TBD, FIXME */
diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c
index dab00577886..6ee89ddfdcf 100644
--- a/libglusterfs/src/cluster-syncop.c
+++ b/libglusterfs/src/cluster-syncop.c
@@ -14,8 +14,8 @@
/* NOTE: Cluster-syncop, like syncop blocks the executing thread until the
* responses are gathered if it is not executed as part of synctask. So it
* shouldn't be invoked in epoll worker thread */
-#include "cluster-syncop.h"
-#include "defaults.h"
+#include "glusterfs/cluster-syncop.h"
+#include "glusterfs/defaults.h"
#define FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fop, \
args...) \
@@ -1203,6 +1203,10 @@ cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on,
if (num_success) {
FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame,
inodelk, dom, &loc, F_SETLKW, &flock, NULL);
+ } else {
+ loc_wipe(&loc);
+ memset(locked_on, 0, numsubvols);
+ return 0;
}
break;
}
@@ -1244,7 +1248,9 @@ cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on,
entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK,
NULL);
} else {
+ loc_wipe(&loc);
memset(locked_on, 0, numsubvols);
+ return 0;
}
break;
}
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 63cbf13388f..682cbf28055 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -24,7 +24,6 @@
#include <time.h>
#include <locale.h>
#include <sys/socket.h>
-#include <sys/wait.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <signal.h>
@@ -35,25 +34,26 @@
#if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS)
#include <sys/sysctl.h>
#endif
-#include <libgen.h>
#ifndef GF_LINUX_HOST_OS
#include <sys/resource.h>
#endif
+#ifdef HAVE_SYNCFS_SYS
+#include <sys/syscall.h>
+#endif
-#include "compat-errno.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "revision.h"
-#include "glusterfs.h"
-#include "stack.h"
-#include "lkowner.h"
-#include "syscall.h"
-#include "cli1-xdr.h"
+#include "glusterfs/compat-errno.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/revision.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/globals.h"
#define XXH_INLINE_ALL
#include "xxhash.h"
#include <ifaddrs.h>
-#include "libglusterfs-messages.h"
-#include "protocol-common.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/glusterfs-acl.h"
#ifdef __FreeBSD__
#include <pthread_np.h>
#undef BIT_SET
@@ -65,20 +65,29 @@
char *vol_type_str[] = {
"Distribute",
- "Stripe",
+ "Stripe [NOT SUPPORTED from v6.0]",
"Replicate",
- "Striped-Replicate",
+ "Striped-Replicate [NOT SUPPORTED from v6.0]",
"Disperse",
- "Tier",
- "Distributed-Stripe",
+ "Tier [NOT SUPPORTED from v6.0]",
+ "Distributed-Stripe [NOT SUPPORTED from v6.0]",
"Distributed-Replicate",
- "Distributed-Striped-Replicate",
+ "Distributed-Striped-Replicate [NOT SUPPORTED from v6.0]",
"Distributed-Disperse",
};
typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
+char *xattrs_to_heal[] = {"user.",
+ POSIX_ACL_ACCESS_XATTR,
+ POSIX_ACL_DEFAULT_XATTR,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ GF_SELINUX_XATTR_KEY,
+ GF_XATTR_MDATA_KEY,
+ NULL};
+
void
gf_xxh64_wrapper(const unsigned char *data, size_t const len,
unsigned long long const seed, char *xxh64)
@@ -306,8 +315,7 @@ mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks)
dir[i] = '\0';
ret = sys_mkdir(dir, mode);
if (ret && errno != EEXIST) {
- gf_msg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
- "Failed due to reason");
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, NULL);
goto out;
}
@@ -318,10 +326,8 @@ mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks)
if (S_ISLNK(stbuf.st_mode)) {
ret = -1;
- gf_msg("", GF_LOG_ERROR, 0, LG_MSG_DIR_IS_SYMLINK,
- "%s is a "
- "symlink",
- dir);
+ gf_smsg("", GF_LOG_ERROR, 0, LG_MSG_DIR_IS_SYMLINK, "dir=%s",
+ dir, NULL);
goto out;
}
}
@@ -334,10 +340,10 @@ mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks)
if (ret == 0)
errno = 0;
ret = -1;
- gf_msg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
- "Failed"
- " to create directory, possibly some of the components"
- " were not directories");
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "possibly some of the components"
+ " were not directories",
+ NULL);
goto out;
}
@@ -410,10 +416,8 @@ gf_rev_dns_lookup(const char *ip)
/* Get the FQDN */
ret = gf_get_hostname_from_ip((char *)ip, &fqdn);
if (ret != 0) {
- gf_msg("resolver", GF_LOG_INFO, errno, LG_MSG_RESOLVE_HOSTNAME_FAILED,
- "could not resolve "
- "hostname for %s",
- ip);
+ gf_smsg("resolver", GF_LOG_INFO, errno, LG_MSG_RESOLVE_HOSTNAME_FAILED,
+ "hostname=%s", ip, NULL);
}
out:
return fqdn;
@@ -434,7 +438,7 @@ gf_resolve_path_parent(const char *path)
GF_VALIDATE_OR_GOTO(THIS->name, path, out);
- if (strlen(path) <= 0) {
+ if (0 == strlen(path)) {
gf_msg_callingfn(THIS->name, GF_LOG_DEBUG, 0, LG_MSG_INVALID_STRING,
"invalid string for 'path'");
goto out;
@@ -502,10 +506,8 @@ gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache,
}
if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) !=
0) {
- gf_msg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED,
- "getaddrinfo failed"
- " (%s)",
- gai_strerror(ret));
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED,
+ "family=%d", family, "ret=%s", gai_strerror(ret), NULL);
GF_FREE(*dnscache);
*dnscache = NULL;
@@ -522,10 +524,8 @@ gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache,
cache->next->ai_addrlen, host, sizeof(host), service,
sizeof(service), NI_NUMERICHOST);
if (ret != 0) {
- gf_msg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
- "getnameinfo failed"
- " (%s)",
- gai_strerror(ret));
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
goto err;
}
@@ -544,10 +544,8 @@ gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache,
cache->next->ai_addrlen, host, sizeof(host), service,
sizeof(service), NI_NUMERICHOST);
if (ret != 0) {
- gf_msg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
- "getnameinfo failed"
- " (%s)",
- gai_strerror(ret));
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
goto err;
}
@@ -579,8 +577,14 @@ struct dnscache *
gf_dnscache_init(time_t ttl)
{
struct dnscache *cache = GF_MALLOC(sizeof(*cache), gf_common_mt_dnscache);
- if (cache) {
- cache->cache_dict = NULL;
+ if (!cache)
+ return NULL;
+
+ cache->cache_dict = dict_new();
+ if (!cache->cache_dict) {
+ GF_FREE(cache);
+ cache = NULL;
+ } else {
cache->ttl = ttl;
}
@@ -588,6 +592,20 @@ gf_dnscache_init(time_t ttl)
}
/**
+ * gf_dnscache_deinit -- cleanup resources used by struct dnscache
+ */
+void
+gf_dnscache_deinit(struct dnscache *cache)
+{
+ if (!cache) {
+ gf_msg_plain(GF_LOG_WARNING, "dnscache is NULL");
+ return;
+ }
+ dict_unref(cache->cache_dict);
+ GF_FREE(cache);
+}
+
+/**
* gf_dnscache_entry_init -- Initialize a dnscache entry
*
* @return: SUCCESS: Pointer to an allocated dnscache entry struct
@@ -635,12 +653,6 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache)
if (!dnscache)
goto out;
- if (!dnscache->cache_dict) {
- dnscache->cache_dict = dict_new();
- if (!dnscache->cache_dict) {
- goto out;
- }
- }
cache = dnscache->cache_dict;
/* Quick cache lookup to see if we already hold it */
@@ -648,7 +660,7 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache)
if (entrydata) {
dnsentry = (struct dnscache_entry *)entrydata->data;
/* First check the TTL & timestamp */
- if (time(NULL) - dnsentry->timestamp > dnscache->ttl) {
+ if (gf_time() - dnsentry->timestamp > dnscache->ttl) {
gf_dnscache_entry_deinit(dnsentry);
entrydata->data = NULL; /* Mark this as 'null' so
* dict_del () doesn't try free
@@ -679,23 +691,16 @@ gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache)
from_cache = _gf_false;
out:
/* Insert into the cache */
- if (fqdn && !from_cache) {
+ if (fqdn && !from_cache && ip) {
struct dnscache_entry *entry = gf_dnscache_entry_init();
- if (!entry) {
- goto out;
- }
- entry->fqdn = fqdn;
- if (!ip) {
- gf_dnscache_entry_deinit(entry);
- goto out;
+ if (entry) {
+ entry->fqdn = fqdn;
+ entry->ip = gf_strdup(ip);
+ entry->timestamp = gf_time();
+ entrydata = bin_to_data(entry, sizeof(*entry));
+ dict_set(cache, (char *)ip, entrydata);
}
-
- entry->ip = gf_strdup(ip);
- entry->timestamp = time(NULL);
-
- entrydata = bin_to_data(entry, sizeof(*entry));
- dict_set(cache, (char *)ip, entrydata);
}
return fqdn;
}
@@ -904,7 +909,7 @@ gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx)
char msg[1024] = {
0,
};
- char timestr[64] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
call_stack_t *stack = NULL;
@@ -944,7 +949,7 @@ gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx)
{
/* Dump the timestamp of the crash too, so the previous logs
can be related */
- gf_time_fmt(timestr, sizeof timestr, time(NULL), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT);
gf_msg_plain_nomem(GF_LOG_ALERT, "time of crash: ");
gf_msg_plain_nomem(GF_LOG_ALERT, timestr);
}
@@ -1702,7 +1707,7 @@ gf_uint64_2human_readable(uint64_t n)
if (ret < 0)
goto err;
} else {
- ret = gf_asprintf(&str, "%luBytes", n);
+ ret = gf_asprintf(&str, "%" PRIu64 "Bytes", n);
if (ret < 0)
goto err;
}
@@ -1946,6 +1951,74 @@ gf_string2boolean(const char *str, gf_boolean_t *b)
}
int
+gf_strn2boolean(const char *str, const int len, gf_boolean_t *b)
+{
+ if (str == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return -1;
+ }
+
+ switch (len) {
+ case 1:
+ if (strcasecmp(str, "1") == 0) {
+ *b = _gf_true;
+ return 0;
+ } else if (strcasecmp(str, "0") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 2:
+ if (strcasecmp(str, "on") == 0) {
+ *b = _gf_true;
+ return 0;
+ } else if (strcasecmp(str, "no") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 3:
+ if (strcasecmp(str, "yes") == 0) {
+ *b = _gf_true;
+ return 0;
+ } else if (strcasecmp(str, "off") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 4:
+ if (strcasecmp(str, "true") == 0) {
+ *b = _gf_true;
+ return 0;
+ }
+ break;
+ case 5:
+ if (strcasecmp(str, "false") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 6:
+ if (strcasecmp(str, "enable") == 0) {
+ *b = _gf_true;
+ return 0;
+ }
+ break;
+ case 7:
+ if (strcasecmp(str, "disable") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ default:
+ return -1;
+ break;
+ }
+ return -1;
+}
+
+int
gf_lockfd(int fd)
{
struct gf_flock fl;
@@ -1972,7 +2045,7 @@ gf_unlockfd(int fd)
}
static void
-compute_checksum(char *buf, size_t size, uint32_t *checksum)
+compute_checksum(char *buf, const ssize_t size, uint32_t *checksum)
{
int ret = -1;
char *checksum_buf = NULL;
@@ -2003,7 +2076,7 @@ compute_checksum(char *buf, size_t size, uint32_t *checksum)
#define GF_CHECKSUM_BUF_SIZE 1024
int
-get_checksum_for_file(int fd, uint32_t *checksum)
+get_checksum_for_file(int fd, uint32_t *checksum, int op_version)
{
int ret = -1;
char buf[GF_CHECKSUM_BUF_SIZE] = {
@@ -2014,8 +2087,12 @@ get_checksum_for_file(int fd, uint32_t *checksum)
sys_lseek(fd, 0L, SEEK_SET);
do {
ret = sys_read(fd, &buf, GF_CHECKSUM_BUF_SIZE);
- if (ret > 0)
- compute_checksum(buf, GF_CHECKSUM_BUF_SIZE, checksum);
+ if (ret > 0) {
+ if (op_version < GD_OP_VERSION_5_4)
+ compute_checksum(buf, GF_CHECKSUM_BUF_SIZE, checksum);
+ else
+ compute_checksum(buf, ret, checksum);
+ }
} while (ret > 0);
/* set it back */
@@ -2025,7 +2102,7 @@ get_checksum_for_file(int fd, uint32_t *checksum)
}
int
-get_checksum_for_path(char *path, uint32_t *checksum)
+get_checksum_for_path(char *path, uint32_t *checksum, int op_version)
{
int ret = -1;
int fd = -1;
@@ -2036,12 +2113,12 @@ get_checksum_for_path(char *path, uint32_t *checksum)
fd = open(path, O_RDWR);
if (fd == -1) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_PATH_ERROR,
- "Unable to open %s", path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_PATH_OPEN_FAILED,
+ "path=%s", path, NULL);
goto out;
}
- ret = get_checksum_for_file(fd, checksum);
+ ret = get_checksum_for_file(fd, checksum, op_version);
out:
if (fd != -1)
@@ -2070,8 +2147,8 @@ get_file_mtime(const char *path, time_t *stamp)
ret = sys_stat(path, &f_stat);
if (ret < 0) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED,
- "failed to stat %s", path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED,
+ "path=%s", path, NULL);
goto out;
}
@@ -2090,7 +2167,7 @@ out:
* @ip_str : The IP to check
* @network: The network to check the IP against.
*
- * @return: success: 0
+ * @return: success: _gf_true
* failure: -EINVAL for bad args, retval of inet_pton otherwise
*/
gf_boolean_t
@@ -2130,14 +2207,14 @@ gf_is_ip_in_net(const char *network, const char *ip_str)
/* Convert IP address to a long */
ret = inet_pton(family, ip_str, &ip_buf);
if (ret < 0)
- gf_msg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED,
- "inet_pton() failed");
+ gf_smsg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED,
+ NULL);
/* Convert network IP address to a long */
ret = inet_pton(family, net_ip, &net_ip_buf);
if (ret < 0) {
- gf_msg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED,
- "inet_pton() failed");
+ gf_smsg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED,
+ NULL);
goto out;
}
@@ -2281,7 +2358,7 @@ next_token(char **tokenp, token_iter_t *tit)
* #include <stdio.h>
* #include <stdlib.h>
* #include <string.h>
- * #include "common-utils.h"
+ * #include "glusterfs/common-utils.h"
*
* int
* main (int argc, char **argv)
@@ -2457,6 +2534,31 @@ out:
return ret;
}
+char
+valid_cidr_address(char *cidr_address, gf_boolean_t wildcard_acc)
+{
+ unsigned int net_mask = 0, len = 0;
+ char *temp = NULL, *cidr_str = NULL, ret = 1;
+
+ cidr_str = strdupa(cidr_address);
+ temp = strstr(cidr_str, "/");
+ if (temp == NULL)
+ return 0; /* Since Invalid cidr ip address we return 0 */
+
+ *temp = '\0';
+ temp++;
+ net_mask = (unsigned int)atoi(temp);
+
+ if (net_mask > 32 || net_mask < 1)
+ return 0; /* Since Invalid cidr ip address we return 0*/
+
+ len = strlen(cidr_str);
+
+ ret = valid_ipv4_address(cidr_str, len, wildcard_acc);
+
+ return ret;
+}
+
/**
* valid_ipv4_subnetwork() takes the pattern and checks if it contains
* a valid ipv4 subnetwork pattern i.e. xx.xx.xx.xx/n. IPv4 address
@@ -2593,7 +2695,8 @@ out:
}
char
-valid_internet_address(char *address, gf_boolean_t wildcard_acc)
+valid_internet_address(char *address, gf_boolean_t wildcard_acc,
+ gf_boolean_t cidr)
{
char ret = 0;
int length = 0;
@@ -2608,6 +2711,10 @@ valid_internet_address(char *address, gf_boolean_t wildcard_acc)
if (length == 0)
goto out;
+ if (cidr && valid_cidr_address(address, wildcard_acc)) {
+ ret = 1;
+ }
+
if (valid_ipv4_address(address, length, wildcard_acc) ||
valid_ipv6_address(address, length, wildcard_acc) ||
valid_host_name(address, length))
@@ -2687,8 +2794,8 @@ gf_boolean_t
gf_sock_union_equal_addr(union gf_sock_union *a, union gf_sock_union *b)
{
if (!a || !b) {
- gf_msg("common-utils", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
- "Invalid arguments to gf_sock_union_equal_addr");
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "gf_sock_union_equal_addr", NULL);
return _gf_false;
}
@@ -2916,8 +3023,8 @@ gf_roundup_power_of_two(int32_t nr)
int32_t result = 1;
if (nr < 0) {
- gf_msg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED,
- "negative number passed");
+ gf_smsg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED,
+ NULL);
result = -1;
goto out;
}
@@ -2940,8 +3047,8 @@ gf_roundup_next_power_of_two(int32_t nr)
int32_t result = 1;
if (nr < 0) {
- gf_msg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED,
- "negative number passed");
+ gf_smsg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED,
+ NULL);
result = -1;
goto out;
}
@@ -2954,16 +3061,6 @@ out:
}
int
-get_vol_type(int type, int dist_count, int brick_count)
-{
- if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) &&
- (dist_count < brick_count))
- type = type + GF_CLUSTER_TYPE_MAX - 1;
-
- return type;
-}
-
-int
validate_brick_name(char *brick)
{
char *delimiter = NULL;
@@ -3036,7 +3133,7 @@ get_mem_size()
memsize = page_size * num_pages;
#endif
-#if defined GF_DARWIN_HOST_OS
+#if defined GF_DARWIN_HOST_OS || defined __FreeBSD__
size_t len = sizeof(memsize);
int name[] = {CTL_HW, HW_PHYSMEM};
@@ -3125,8 +3222,7 @@ gf_canonicalize_path(char *path)
out:
if (ret)
- gf_msg("common-utils", GF_LOG_ERROR, 0, LG_MSG_PATH_ERROR,
- "Path manipulation failed");
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_PATH_ERROR, NULL);
GF_FREE(tmppath);
@@ -3180,19 +3276,15 @@ gf_get_reserved_ports()
* continue with older method of using any of the available
* port? For now 2nd option is considered.
*/
- gf_msg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
- "could not open the file "
- "/proc/sys/net/ipv4/ip_local_reserved_ports for "
- "getting reserved ports info");
+ gf_smsg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ " /proc/sys/net/ipv4/ip_local_reserved_ports", NULL);
goto out;
}
ret = sys_read(proc_fd, buffer, sizeof(buffer) - 1);
if (ret < 0) {
- gf_msg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
- "could not read the file %s for"
- " getting reserved ports info",
- proc_file);
+ gf_smsg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "file=%s", proc_file, NULL);
goto out;
}
@@ -3220,10 +3312,8 @@ gf_process_reserved_ports(unsigned char *ports, uint32_t ceiling)
ports_info = gf_get_reserved_ports();
if (!ports_info) {
- gf_msg("glusterfs", GF_LOG_WARNING, 0, LG_MSG_RESERVED_PORTS_ERROR,
- "Not able to get reserved"
- " ports, hence there is a possibility that glusterfs "
- "may consume reserved port");
+ gf_smsg("glusterfs", GF_LOG_WARNING, 0, LG_MSG_RESERVED_PORTS_ERROR,
+ NULL);
goto out;
}
@@ -3259,9 +3349,9 @@ gf_ports_reserved(char *blocked_port, unsigned char *ports, uint32_t ceiling)
if (blocked_port[strlen(blocked_port) - 1] == '\n')
blocked_port[strlen(blocked_port) - 1] = '\0';
if (gf_string2int32(blocked_port, &tmp_port1) == 0) {
- if (tmp_port1 > ceiling || tmp_port1 < 0) {
- gf_msg("glusterfs-socket", GF_LOG_WARNING, 0,
- LG_MSG_INVALID_PORT, "invalid port %d", tmp_port1);
+ if (tmp_port1 > GF_PORT_MAX || tmp_port1 < 0) {
+ gf_smsg("glusterfs-socket", GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_PORT, "port=%d", tmp_port1, NULL);
result = _gf_true;
goto out;
} else {
@@ -3272,10 +3362,8 @@ gf_ports_reserved(char *blocked_port, unsigned char *ports, uint32_t ceiling)
BIT_SET(ports, tmp_port1);
}
} else {
- gf_msg("glusterfs-socket", GF_LOG_WARNING, 0, LG_MSG_INVALID_PORT,
- "%s is not a valid port "
- "identifier",
- blocked_port);
+ gf_smsg("glusterfs-socket", GF_LOG_WARNING, 0, LG_MSG_INVALID_PORT,
+ "port=%s", blocked_port, NULL);
result = _gf_true;
goto out;
}
@@ -3377,10 +3465,8 @@ gf_get_hostname_from_ip(char *client_ip, char **hostname)
ret = getnameinfo(client_sockaddr, addr_sz, client_hostname,
sizeof(client_hostname), NULL, 0, 0);
if (ret) {
- gf_msg("common-utils", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
- "Could not lookup hostname "
- "of %s : %s",
- client_ip, gai_strerror(ret));
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ip=%s", client_ip, "ret=%s", gai_strerror(ret), NULL);
ret = -1;
goto out;
}
@@ -3409,8 +3495,8 @@ gf_interface_search(char *ip)
ret = getifaddrs(&ifaddr);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETIFADDRS_FAILED,
- "getifaddrs() failed: %s\n", gai_strerror(ret));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETIFADDRS_FAILED, "ret=%s",
+ gai_strerror(ret), NULL);
goto out;
}
@@ -3434,10 +3520,8 @@ gf_interface_search(char *ip)
host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
- "getnameinfo() "
- "failed: %s\n",
- gai_strerror(ret));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
goto out;
}
@@ -3487,14 +3571,12 @@ get_ip_from_addrinfo(struct addrinfo *addr, char **ip)
break;
default:
- gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY,
- "Invalid family");
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY, NULL);
return NULL;
}
if (!inet_ntop(addr->ai_family, in_addr, buf, sizeof(buf))) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_CONVERSION_FAILED,
- "String conversion failed");
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_CONVERSION_FAILED, NULL);
return NULL;
}
@@ -3529,10 +3611,9 @@ gf_is_loopback_localhost(const struct sockaddr *sa, char *hostname)
default:
if (hostname)
- gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY,
- "unknown "
- "address family %d for %s",
- sa->sa_family, hostname);
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY,
+ "family=%d", sa->sa_family, "hostname=%s", hostname,
+ NULL);
break;
}
@@ -3562,23 +3643,27 @@ gf_is_local_addr(char *hostname)
ret = getaddrinfo(hostname, NULL, &hints, &result);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED,
- "error in getaddrinfo: %s\n", gai_strerror(ret));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
goto out;
}
for (res = result; res != NULL; res = res->ai_next) {
- gf_msg_debug(this->name, 0, "%s ", get_ip_from_addrinfo(res, &ip));
+ get_ip_from_addrinfo(res, &ip);
+ gf_msg_debug(this->name, 0, "%s ", ip);
if (ip) {
- found = gf_is_loopback_localhost(res->ai_addr, hostname) ||
- gf_interface_search(ip);
+ found = (gf_is_loopback_localhost(res->ai_addr, hostname) ||
+ gf_interface_search(ip));
}
if (found) {
GF_FREE(ip);
goto out;
}
GF_FREE(ip);
+ /* the above free will not set ip to NULL, and hence, there is
+ double free possible as the loop continues. set ip to NULL. */
+ ip = NULL;
}
out:
@@ -3607,15 +3692,15 @@ gf_is_same_address(char *name1, char *name2)
gai_err = getaddrinfo(name1, NULL, &hints, &addr1);
if (gai_err != 0) {
- gf_msg(name1, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED,
- "error in getaddrinfo: %s\n", gai_strerror(gai_err));
+ gf_smsg(name1, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, "error=%s",
+ gai_strerror(gai_err), NULL);
goto out;
}
gai_err = getaddrinfo(name2, NULL, &hints, &addr2);
if (gai_err != 0) {
- gf_msg(name2, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED,
- "error in getaddrinfo: %s\n", gai_strerror(gai_err));
+ gf_smsg(name2, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, "error=%s",
+ gai_strerror(gai_err), NULL);
goto out;
}
@@ -3752,8 +3837,10 @@ gf_set_volfile_server_common(cmd_args_t *cmd_args, const char *host,
if ((!strcmp(tmp->volfile_server, server->volfile_server) &&
!strcmp(tmp->transport, server->transport) &&
(tmp->port == server->port))) {
- errno = EEXIST;
- ret = -1;
+ /* Duplicate option given, log and ignore */
+ gf_smsg("gluster", GF_LOG_INFO, EEXIST, LG_MSG_DUPLICATE_ENTRY,
+ NULL);
+ ret = 0;
goto out;
}
}
@@ -3916,19 +4003,63 @@ error_return:
return ret;
}
+void
+gf_thread_set_vname(pthread_t thread, const char *name, va_list args)
+{
+ char thread_name[GF_THREAD_NAME_LIMIT];
+ int ret;
+
+ /* Initialize the thread name with the prefix (not NULL terminated). */
+ memcpy(thread_name, GF_THREAD_NAME_PREFIX,
+ sizeof(GF_THREAD_NAME_PREFIX) - 1);
+
+ ret = vsnprintf(thread_name + sizeof(GF_THREAD_NAME_PREFIX) - 1,
+ sizeof(thread_name) - sizeof(GF_THREAD_NAME_PREFIX) + 1,
+ name, args);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PTHREAD_NAMING_FAILED,
+ "name=%s", name, NULL);
+ return;
+ }
+
+ if (ret >= sizeof(thread_name)) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_THREAD_NAME_TOO_LONG,
+ "name=%s", thread_name, NULL);
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ ret = pthread_setname_np(thread, thread_name);
+#elif defined(__NetBSD__)
+ ret = pthread_setname_np(thread, thread_name, NULL);
+#elif defined(__FreeBSD__)
+ pthread_set_name_np(thread, thread_name);
+ ret = 0;
+#else
+ ret = ENOSYS;
+#endif
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, ret, LG_MSG_SET_THREAD_FAILED,
+ "name=%s", thread_name, NULL);
+ }
+}
+
+void
+gf_thread_set_name(pthread_t thread, const char *name, ...)
+{
+ va_list args;
+
+ va_start(args, name);
+ gf_thread_set_vname(thread, name, args);
+ va_end(args);
+}
+
int
-gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
- void *(*start_routine)(void *), void *arg, const char *name)
+gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ va_list args)
{
sigset_t set, old;
int ret;
- char thread_name[GF_THREAD_NAMEMAX + GF_THREAD_NAME_PREFIX_LEN] = {
- 0,
- };
- /* Max name on Linux is 16 and on NetBSD is 32
- * All Gluster threads have a set prefix of gluster and hence the limit
- * of 9 on GF_THREAD_NAMEMAX including the null character.
- */
sigemptyset(&old);
sigfillset(&set);
@@ -3942,20 +4073,12 @@ gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
pthread_sigmask(SIG_BLOCK, &set, &old);
ret = pthread_create(thread, attr, start_routine, arg);
- snprintf(thread_name, sizeof(thread_name), "%s%s", GF_THREAD_NAME_PREFIX,
- name);
-
- if (0 == ret && name) {
-#ifdef GF_LINUX_HOST_OS
- pthread_setname_np(*thread, thread_name);
-#elif defined(__NetBSD__)
- pthread_setname_np(*thread, thread_name, NULL);
-#elif defined(__FreeBSD__)
- pthread_set_name_np(*thread, thread_name);
-#else
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PTHREAD_NAMING_FAILED,
- "Could not set thread name: %s", thread_name);
-#endif
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_THREAD_CREATE_FAILED,
+ NULL);
+ ret = -1;
+ } else if (name != NULL) {
+ gf_thread_set_vname(*thread, name, args);
}
pthread_sigmask(SIG_SETMASK, &old, NULL);
@@ -3964,27 +4087,40 @@ gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
}
int
+gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, name);
+ ret = gf_thread_vcreate(thread, attr, start_routine, arg, name, args);
+ va_end(args);
+
+ return ret;
+}
+
+int
gf_thread_create_detached(pthread_t *thread, void *(*start_routine)(void *),
- void *arg, const char *name)
+ void *arg, const char *name, ...)
{
pthread_attr_t attr;
+ va_list args;
int ret = -1;
ret = pthread_attr_init(&attr);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_ATTR_INIT_FAILED,
- "Thread attribute initialization failed");
+ gf_smsg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_ATTR_INIT_FAILED,
+ NULL);
return -1;
}
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
- ret = gf_thread_create(thread, &attr, start_routine, arg, name);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_FAILED,
- "Thread creation failed");
- ret = -1;
- }
+ va_start(args, name);
+ ret = gf_thread_vcreate(thread, &attr, start_routine, arg, name, args);
+ va_end(args);
pthread_attr_destroy(&attr);
@@ -3998,8 +4134,7 @@ gf_skip_header_section(int fd, int header_len)
ret = sys_lseek(fd, header_len, SEEK_SET);
if (ret == (off_t)-1) {
- gf_msg("", GF_LOG_ERROR, 0, LG_MSG_SKIP_HEADER_FAILED,
- "Failed to skip header section");
+ gf_smsg("", GF_LOG_ERROR, 0, LG_MSG_SKIP_HEADER_FAILED, NULL);
} else {
ret = 0;
}
@@ -4012,6 +4147,14 @@ gf_skip_header_section(int fd, int header_len)
gf_boolean_t
gf_is_pid_running(int pid)
{
+#ifdef __FreeBSD__
+ int ret = -1;
+
+ ret = sys_kill(pid, 0);
+ if (ret < 0) {
+ return _gf_false;
+ }
+#else
char fname[32] = {
0,
};
@@ -4025,6 +4168,7 @@ gf_is_pid_running(int pid)
}
sys_close(fd);
+#endif
return _gf_true;
}
@@ -4045,14 +4189,15 @@ gf_is_service_running(char *pidfile, int *pid)
ret = lockf(fno, F_TEST, 0);
if (ret == -1) {
running = _gf_true;
- goto out;
}
ret = fscanf(file, "%d", pid);
if (ret <= 0) {
- gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
- "Unable to read pidfile: %s", pidfile);
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, "pidfile=%s",
+ pidfile, NULL);
*pid = -1;
+ running = _gf_false;
+ goto out;
}
running = gf_is_pid_running(*pid);
@@ -4123,10 +4268,10 @@ gf_check_log_format(const char *value)
log_format = gf_logformat_withmsgid;
if (log_format == -1)
- gf_msg(
- THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG,
- "Invalid log-format. possible values are " GF_LOG_FORMAT_NO_MSG_ID
- "|" GF_LOG_FORMAT_WITH_MSG_ID);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG,
+ "possible_values=" GF_LOG_FORMAT_NO_MSG_ID
+ "|" GF_LOG_FORMAT_WITH_MSG_ID,
+ NULL);
return log_format;
}
@@ -4142,9 +4287,9 @@ gf_check_logger(const char *value)
logger = gf_logger_syslog;
if (logger == -1)
- gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG,
- "Invalid logger. possible values are " GF_LOGGER_GLUSTER_LOG
- "|" GF_LOGGER_SYSLOG);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG,
+ "possible_values=" GF_LOGGER_GLUSTER_LOG "|" GF_LOGGER_SYSLOG,
+ NULL);
return logger;
}
@@ -4216,8 +4361,8 @@ gf_set_timestamp(const char *src, const char *dest)
ret = sys_stat(src, &sb);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED,
- "stat on %s", src);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED,
+ "stat=%s", src, NULL);
goto out;
}
/* The granularity is nano seconds if `utimensat()` is available,
@@ -4233,8 +4378,8 @@ gf_set_timestamp(const char *src, const char *dest)
/* dirfd = 0 is ignored because `dest` is an absolute path. */
ret = sys_utimensat(AT_FDCWD, dest, new_time, AT_SYMLINK_NOFOLLOW);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMENSAT_FAILED,
- "utimensat on %s", dest);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMENSAT_FAILED,
+ "dest=%s", dest, NULL);
}
#else
new_time[0].tv_sec = sb.st_atime;
@@ -4245,8 +4390,8 @@ gf_set_timestamp(const char *src, const char *dest)
ret = sys_utimes(dest, new_time);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMES_FAILED,
- "utimes on %s", dest);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMES_FAILED,
+ "dest=%s", dest, NULL);
}
#endif
out:
@@ -4265,7 +4410,7 @@ gf_backtrace_end(char *buf, size_t frames)
frames = min(frames, GF_BACKTRACE_LEN - pos - 1);
- if (frames <= 0)
+ if (0 == frames)
return;
memset(buf + pos, ')', frames);
@@ -4311,8 +4456,8 @@ gf_backtrace_fillframes(char *buf)
*/
ret = sys_unlink(tmpl);
if (ret < 0) {
- gf_msg(THIS->name, GF_LOG_INFO, 0, LG_MSG_FILE_OP_FAILED,
- "Unable to delete temporary file: %s", tmpl);
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, LG_MSG_FILE_DELETE_FAILED,
+ "temporary_file=%s", tmpl, NULL);
}
/*The most recent two frames are the calling function and
@@ -4372,8 +4517,7 @@ gf_backtrace_save(char *buf)
if ((0 == gf_backtrace_fillframes(bt)))
return bt;
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_BACKTRACE_SAVE_FAILED,
- "Failed to save the backtrace.");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_BACKTRACE_SAVE_FAILED, NULL);
return NULL;
}
@@ -4411,9 +4555,13 @@ fop_log_level(glusterfs_fop_t fop, int op_errno)
return GF_LOG_DEBUG;
if (fop == GF_FOP_SEEK) {
+#ifdef HAVE_SEEK_HOLE
if (op_errno == ENXIO) {
return GF_LOG_DEBUG;
}
+#else
+ return GF_LOG_DEBUG;
+#endif
}
return GF_LOG_ERROR;
@@ -4460,16 +4608,16 @@ gf_build_absolute_path(char *current_path, char *relative_path, char **path)
*/
currentpath_len = strlen(current_path);
if (current_path[0] != '/' || (currentpath_len > PATH_MAX)) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
- "Wrong value for current path %s", current_path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_WRONG_VALUE,
+ "current-path=%s", current_path, NULL);
ret = -EINVAL;
goto err;
}
relativepath_len = strlen(relative_path);
if (relative_path[0] == '/' || (relativepath_len > PATH_MAX)) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
- "Wrong value for relative path %s", relative_path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_WRONG_VALUE,
+ "relative-path=%s", relative_path, NULL);
ret = -EINVAL;
goto err;
}
@@ -4585,8 +4733,9 @@ recursive_rmdir(const char *delete_path)
goto out;
}
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
- while (entry) {
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
ret = sys_lstat(path, &st);
if (ret == -1) {
@@ -4612,8 +4761,6 @@ recursive_rmdir(const char *delete_path)
gf_msg_debug(this->name, 0, "%s %s",
ret ? "Failed to remove" : "Removed", entry->d_name);
-
- GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch);
}
ret = sys_closedir(dir);
@@ -4937,8 +5084,8 @@ gf_zero_fill_stat(struct iatt *buf)
gf_boolean_t
gf_is_valid_xattr_namespace(char *key)
{
- static char *xattr_namespaces[] = {"trusted.", "security.", "system.",
- "user.", NULL};
+ static char *xattr_namespaces[] = {"trusted.", "system.", "user.",
+ "security.", NULL};
int i = 0;
for (i = 0; xattr_namespaces[i]; i++) {
@@ -5161,62 +5308,6 @@ glusterfs_compute_sha256(const unsigned char *content, size_t size,
return 0;
}
-char *
-get_struct_variable(int mem_num, gf_gsync_status_t *sts_val)
-{
- switch (mem_num) {
- case 0:
- return (sts_val->node);
- case 1:
- return (sts_val->master);
- case 2:
- return (sts_val->brick);
- case 3:
- return (sts_val->slave_user);
- case 4:
- return (sts_val->slave);
- case 5:
- return (sts_val->slave_node);
- case 6:
- return (sts_val->worker_status);
- case 7:
- return (sts_val->crawl_status);
- case 8:
- return (sts_val->last_synced);
- case 9:
- return (sts_val->entry);
- case 10:
- return (sts_val->data);
- case 11:
- return (sts_val->meta);
- case 12:
- return (sts_val->failures);
- case 13:
- return (sts_val->checkpoint_time);
- case 14:
- return (sts_val->checkpoint_completed);
- case 15:
- return (sts_val->checkpoint_completion_time);
- case 16:
- return (sts_val->brick_host_uuid);
- case 17:
- return (sts_val->last_synced_utc);
- case 18:
- return (sts_val->checkpoint_time_utc);
- case 19:
- return (sts_val->checkpoint_completion_time_utc);
- case 20:
- return (sts_val->slavekey);
- case 21:
- return (sts_val->session_slave);
- default:
- goto out;
- }
-
-out:
- return NULL;
-}
-
/* * Safe wrapper function for strncpy.
* This wrapper makes sure that when there is no null byte among the first n in
* source srting for strncpy function call, the string placed in dest will be
@@ -5294,3 +5385,81 @@ gf_replace_new_iatt_in_dict(dict_t *xdata)
return ret;
}
+
+xlator_cmdline_option_t *
+find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args)
+{
+ xlator_cmdline_option_t *pos = NULL;
+ xlator_cmdline_option_t *tmp = NULL;
+
+ list_for_each_entry_safe(pos, tmp, &args->xlator_options, cmd_args)
+ {
+ if (strcmp(pos->key, option_name) == 0)
+ return pos;
+ }
+ return NULL;
+}
+
+int
+gf_d_type_from_ia_type(ia_type_t type)
+{
+ switch (type) {
+ case IA_IFDIR:
+ return DT_DIR;
+ case IA_IFCHR:
+ return DT_CHR;
+ case IA_IFBLK:
+ return DT_BLK;
+ case IA_IFIFO:
+ return DT_FIFO;
+ case IA_IFLNK:
+ return DT_LNK;
+ case IA_IFREG:
+ return DT_REG;
+ case IA_IFSOCK:
+ return DT_SOCK;
+ default:
+ return DT_UNKNOWN;
+ }
+}
+
+int
+gf_nanosleep(uint64_t nsec)
+{
+ struct timespec req;
+ struct timespec rem;
+ int ret = -1;
+
+ req.tv_sec = nsec / GF_SEC_IN_NS;
+ req.tv_nsec = nsec % GF_SEC_IN_NS;
+
+ do {
+ ret = nanosleep(&req, &rem);
+ req = rem;
+ } while (ret == -1 && errno == EINTR);
+
+ return ret;
+}
+
+int
+gf_syncfs(int fd)
+{
+ int ret = 0;
+#if defined(HAVE_SYNCFS)
+ /* Linux with glibc recent enough. */
+ ret = syncfs(fd);
+#elif defined(HAVE_SYNCFS_SYS)
+ /* Linux with no library function. */
+ ret = syscall(SYS_syncfs, fd);
+#else
+ /* Fallback to generic UNIX stuff. */
+ sync();
+#endif
+ return ret;
+}
+
+char **
+get_xattrs_to_heal()
+{
+ return xattrs_to_heal;
+}
diff --git a/libglusterfs/src/compat-errno.c b/libglusterfs/src/compat-errno.c
index 5051b75c772..df57e243239 100644
--- a/libglusterfs/src/compat-errno.c
+++ b/libglusterfs/src/compat-errno.c
@@ -10,7 +10,7 @@
#include <stdint.h>
-#include "compat-errno.h"
+#include "glusterfs/compat-errno.h"
static int32_t gf_error_to_errno_array[1024];
static int32_t gf_errno_to_error_array[1024];
diff --git a/libglusterfs/src/compat.c b/libglusterfs/src/compat.c
index 137bdf45cab..8a05a30a8fe 100644
--- a/libglusterfs/src/compat.c
+++ b/libglusterfs/src/compat.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>
@@ -16,17 +15,12 @@
#include <sys/types.h>
#include <dirent.h>
-#ifdef GF_SOLARIS_HOST_OS
-#include "logging.h"
-#endif /* GF_SOLARIS_HOST_OS */
-
-#include "compat.h"
-#include "common-utils.h"
-#include "iatt.h"
-#include "inode.h"
-#include "syscall.h"
-#include "run.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/run.h"
+#include "glusterfs/libglusterfs-messages.h"
#ifdef GF_SOLARIS_HOST_OS
int
@@ -182,7 +176,7 @@ solaris_xattr_resolve_path(const char *real_path, char **path)
if (!ret && export_path) {
strcat(export_path, "/" GF_SOLARIS_XATTR_DIR);
if (lstat(export_path, &statbuf)) {
- ret = mkdir(export_path, 0777);
+ ret = mkdir(export_path, 0755);
if (ret && (errno != EEXIST)) {
gf_msg_debug(THIS->name, 0,
"mkdir failed,"
diff --git a/libglusterfs/src/compound-fop-utils.c b/libglusterfs/src/compound-fop-utils.c
deleted file mode 100644
index 8bdb8e8fd5b..00000000000
--- a/libglusterfs/src/compound-fop-utils.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "default-args.h"
-#include "mem-types.h"
-#include "dict.h"
-
-void
-compound_args_cleanup(compound_args_t *args)
-{
- int i;
-
- if (!args)
- return;
-
- if (args->xdata)
- dict_unref(args->xdata);
-
- if (args->req_list) {
- for (i = 0; i < args->fop_length; i++) {
- args_wipe(&args->req_list[i]);
- }
- }
-
- GF_FREE(args->enum_list);
- GF_FREE(args->req_list);
- GF_FREE(args);
-}
-
-void
-compound_args_cbk_cleanup(compound_args_cbk_t *args_cbk)
-{
- int i;
-
- if (!args_cbk)
- return;
-
- if (args_cbk->xdata)
- dict_unref(args_cbk->xdata);
-
- if (args_cbk->rsp_list) {
- for (i = 0; i < args_cbk->fop_length; i++) {
- args_cbk_wipe(&args_cbk->rsp_list[i]);
- }
- }
-
- GF_FREE(args_cbk->rsp_list);
- GF_FREE(args_cbk->enum_list);
- GF_FREE(args_cbk);
-}
-
-compound_args_cbk_t *
-compound_args_cbk_alloc(int length, dict_t *xdata)
-{
- int i = 0;
- compound_args_cbk_t *args_cbk = NULL;
-
- args_cbk = GF_CALLOC(1, sizeof(*args_cbk), gf_mt_compound_rsp_t);
- if (!args_cbk)
- return NULL;
-
- args_cbk->fop_length = length;
-
- args_cbk->rsp_list = GF_CALLOC(length, sizeof(*args_cbk->rsp_list),
- gf_mt_default_args_cbk_t);
- if (!args_cbk->rsp_list)
- goto out;
-
- for (i = 0; i < length; i++) {
- args_cbk_init(&args_cbk->rsp_list[i]);
- }
-
- args_cbk->enum_list = GF_CALLOC(length, sizeof(*args_cbk->enum_list),
- gf_common_mt_int);
- if (!args_cbk->enum_list)
- goto out;
-
- if (xdata) {
- args_cbk->xdata = dict_copy_with_ref(xdata, NULL);
- if (!args_cbk->xdata)
- goto out;
- }
-
- return args_cbk;
-out:
- compound_args_cbk_cleanup(args_cbk);
- return NULL;
-}
-
-compound_args_t *
-compound_fop_alloc(int length, glusterfs_compound_fop_t fop, dict_t *xdata)
-{
- compound_args_t *args = NULL;
-
- args = GF_CALLOC(1, sizeof(*args), gf_mt_compound_req_t);
-
- if (!args)
- return NULL;
-
- /* fop_enum can be used by xlators to see which fops are
- * included as part of compound fop. This will help in checking
- * for compatibility or support without going through the entire
- * fop list packed.
- */
- args->fop_enum = fop;
- args->fop_length = length;
-
- args->enum_list = GF_CALLOC(length, sizeof(*args->enum_list),
- gf_common_mt_int);
-
- if (!args->enum_list)
- goto out;
-
- args->req_list = GF_CALLOC(length, sizeof(*args->req_list),
- gf_mt_default_args_t);
-
- if (!args->req_list)
- goto out;
-
- if (xdata) {
- args->xdata = dict_copy_with_ref(xdata, args->xdata);
- if (!args->xdata)
- goto out;
- }
-
- return args;
-out:
- compound_args_cleanup(args);
- return NULL;
-}
diff --git a/libglusterfs/src/compound-fop-utils.h b/libglusterfs/src/compound-fop-utils.h
deleted file mode 100644
index 23a2f0df3fd..00000000000
--- a/libglusterfs/src/compound-fop-utils.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __COMPOUND_FOP_UTILS_H__
-#define __COMPOUND_FOP_UTILS_H__
-
-#include "defaults.h"
-#include "default-args.h"
-#include "mem-types.h"
-#include "dict.h"
-
-#define COMPOUND_PACK_ARGS(fop, fop_enum, args, counter, params...) \
- do { \
- args->enum_list[counter] = fop_enum; \
- args_##fop##_store(&args->req_list[counter], params); \
- } while (0)
-
-compound_args_t *
-compound_fop_alloc(int length, glusterfs_compound_fop_t fop, dict_t *xdata);
-
-void
-compound_args_cleanup(compound_args_t *args);
-
-void
-compound_args_cbk_cleanup(compound_args_cbk_t *args_cbk);
-
-compound_args_cbk_t *
-compound_args_cbk_alloc(int length, dict_t *xdata);
-#endif /* __COMPOUND_FOP_UTILS_H__ */
diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c
index 7a0e0ad5ea4..3d890b04ec9 100644
--- a/libglusterfs/src/ctx.c
+++ b/libglusterfs/src/ctx.c
@@ -10,8 +10,8 @@
#include <pthread.h>
-#include "globals.h"
-#include "glusterfs.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/glusterfs.h"
#include "timer-wheel.h"
glusterfs_ctx_t *
@@ -37,8 +37,12 @@ glusterfs_ctx_new()
ctx->log.loglevel = DEFAULT_LOG_LEVEL;
-#ifdef RUN_WITH_VALGRIND
- ctx->cmd_args.valgrind = _gf_true;
+#if defined(RUN_WITH_MEMCHECK)
+ ctx->cmd_args.vgtool = _gf_memcheck;
+#elif defined(RUN_WITH_DRD)
+ ctx->cmd_args.vgtool = _gf_drd;
+#else
+ ctx->cmd_args.vgtool = _gf_none;
#endif
/* lock is never destroyed! */
diff --git a/libglusterfs/src/daemon.c b/libglusterfs/src/daemon.c
index f821f8f7ed0..0a3e5438325 100644
--- a/libglusterfs/src/daemon.c
+++ b/libglusterfs/src/daemon.c
@@ -8,10 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
-#include "daemon.h"
+#include "glusterfs/daemon.h"
int
os_daemon_return(int nochdir, int noclose)
diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c
index c92f3d46a86..a0ba1cfb299 100644
--- a/libglusterfs/src/default-args.c
+++ b/libglusterfs/src/default-args.c
@@ -13,8 +13,7 @@
#include "config.h"
#endif
-#include "xlator.h"
-#include "defaults.h"
+#include "glusterfs/defaults.h"
int
args_lookup_store(default_args_t *args, loc_t *loc, dict_t *xdata)
@@ -1137,7 +1136,8 @@ args_rchecksum_cbk_store(default_args_cbk_t *args, int32_t op_ret,
args->op_errno = op_errno;
if (op_ret >= 0) {
args->weak_checksum = weak_checksum;
- args->strong_checksum = memdup(strong_checksum, SHA256_DIGEST_LENGTH);
+ args->strong_checksum = gf_memdup(strong_checksum,
+ SHA256_DIGEST_LENGTH);
}
if (xdata)
args->xdata = dict_ref(xdata);
@@ -1541,6 +1541,48 @@ args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata)
return 0;
}
+int
+args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ if (fd_in)
+ args->fd = fd_ref(fd_in);
+ if (fd_out)
+ args->fd_dst = fd_ref(fd_out);
+ args->size = len;
+ args->off_in = off_in;
+ args->off_out = off_out;
+ args->flags = flags;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0) {
+ if (postbuf_dst)
+ args->poststat = *postbuf_dst;
+ if (prebuf_dst)
+ args->prestat = *prebuf_dst;
+ if (stbuf)
+ args->stat = *stbuf;
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
void
args_cbk_wipe(default_args_cbk_t *args_cbk)
{
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
index bdb30d4872c..3cf707f42aa 100644
--- a/libglusterfs/src/defaults-tmpl.c
+++ b/libglusterfs/src/defaults-tmpl.c
@@ -25,8 +25,8 @@
#include "config.h"
#endif
-#include "xlator.h"
-#include "defaults.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
#pragma generate
@@ -84,6 +84,7 @@ struct xlator_fops _default_fops = {
.put = default_put,
.icreate = default_icreate,
.namelink = default_namelink,
+ .copy_file_range = default_copy_file_range,
};
struct xlator_fops *default_fops = &_default_fops;
@@ -126,6 +127,12 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
GF_UNUSED int ret = 0;
xlator_t *victim = data;
+ glusterfs_graph_t *graph = NULL;
+
+ GF_VALIDATE_OR_GOTO("notify", this, out);
+ graph = this->graph;
+ GF_VALIDATE_OR_GOTO(this->name, graph, out);
+
switch (event) {
case GF_EVENT_PARENT_UP:
case GF_EVENT_PARENT_DOWN: {
@@ -158,6 +165,20 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
xlator_notify(parent->xlator, event, this, NULL);
parent = parent->next;
}
+
+ if (event == GF_EVENT_CHILD_DOWN &&
+ !(this->ctx && this->ctx->master) && (graph->top == this)) {
+ /* Make sure this is not a daemon with master xlator */
+ pthread_mutex_lock(&graph->mutex);
+ {
+ if (graph->parent_down ==
+ graph_total_client_xlator(graph)) {
+ graph->used = 0;
+ pthread_cond_broadcast(&graph->child_down_cond);
+ }
+ }
+ pthread_mutex_unlock(&graph->mutex);
+ }
} break;
case GF_EVENT_UPCALL: {
xlator_list_t *parent = this->parents;
@@ -204,7 +225,7 @@ default_notify(xlator_t *this, int32_t event, void *data, ...)
* nothing to do with readability.
*/
}
-
+out:
return 0;
}
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index aed35df191e..1d9be9217a6 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -16,20 +16,14 @@
#include <limits.h>
#include <fnmatch.h>
-#include "glusterfs.h"
-#include "common-utils.h"
-#include "dict.h"
+#include "glusterfs/dict.h"
#define XXH_INLINE_ALL
#include "xxhash.h"
-#include "logging.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "byte-order.h"
-#include "globals.h"
-#include "statedump.h"
-#include "libglusterfs-messages.h"
-
-#include "glusterfs-fops.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/compat-errno.h"
+#include "glusterfs/byte-order.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/libglusterfs-messages.h"
struct dict_cmp {
dict_t *dict;
@@ -39,14 +33,14 @@ struct dict_cmp {
#define VALIDATE_DATA_AND_LOG(data, type, key, ret_val) \
do { \
if (!data || !data->data) { \
- gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, \
- LG_MSG_INVALID_ARG, "data is NULL"); \
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, \
+ "data is NULL"); \
return ret_val; \
} \
/* Not of the asked type, or old version */ \
if ((data->data_type != type) && \
(data->data_type != GF_DATA_TYPE_STR_OLD)) { \
- gf_msg_callingfn("dict", GF_LOG_INFO, EINVAL, LG_MSG_INVALID_ARG, \
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, \
"key %s, %s type asked, has %s type", key, \
data_type_name[type], \
data_type_name[data->data_type]); \
@@ -63,7 +57,6 @@ get_new_data()
GF_ATOMIC_INIT(data->refcount, 0);
data->is_static = _gf_false;
- LOCK_INIT(&data->lock);
return data;
}
@@ -104,18 +97,14 @@ get_new_dict_full(int size_hint)
}
}
+ dict->free_pair.key = NULL;
+ dict->totkvlen = 0;
LOCK_INIT(&dict->lock);
return dict;
}
dict_t *
-get_new_dict(void)
-{
- return get_new_dict_full(1);
-}
-
-dict_t *
dict_new(void)
{
dict_t *dict = get_new_dict_full(1);
@@ -130,6 +119,7 @@ int32_t
is_data_equal(data_t *one, data_t *two)
{
struct iatt *iatt1, *iatt2;
+ struct mdata_iatt *mdata_iatt1, *mdata_iatt2;
if (!one || !two || !one->data || !two->data) {
gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
@@ -194,6 +184,24 @@ is_data_equal(data_t *one, data_t *two)
*/
return 1;
}
+ if (one->data_type == GF_DATA_TYPE_MDATA) {
+ if ((one->len < sizeof(struct mdata_iatt)) ||
+ (two->len < sizeof(struct mdata_iatt))) {
+ return 0;
+ }
+ mdata_iatt1 = (struct mdata_iatt *)one->data;
+ mdata_iatt2 = (struct mdata_iatt *)two->data;
+
+ if (mdata_iatt1->ia_atime != mdata_iatt2->ia_atime ||
+ mdata_iatt1->ia_mtime != mdata_iatt2->ia_mtime ||
+ mdata_iatt1->ia_ctime != mdata_iatt2->ia_ctime ||
+ mdata_iatt1->ia_atime_nsec != mdata_iatt2->ia_atime_nsec ||
+ mdata_iatt1->ia_mtime_nsec != mdata_iatt2->ia_mtime_nsec ||
+ mdata_iatt1->ia_ctime_nsec != mdata_iatt2->ia_ctime_nsec) {
+ return 0;
+ }
+ return 1;
+ }
if (one->len != two->len)
return 0;
@@ -286,8 +294,6 @@ void
data_destroy(data_t *data)
{
if (data) {
- LOCK_DESTROY(&data->lock);
-
if (!data->is_static)
GF_FREE(data->data);
@@ -312,13 +318,12 @@ data_copy(data_t *old)
newdata->len = old->len;
if (old->data) {
- newdata->data = memdup(old->data, old->len);
+ newdata->data = gf_memdup(old->data, old->len);
if (!newdata->data)
goto err_out;
}
newdata->data_type = old->data_type;
- LOCK_INIT(&newdata->lock);
return newdata;
err_out:
@@ -332,7 +337,7 @@ err_out:
* checked by callers.
*/
static data_pair_t *
-dict_lookup_common(dict_t *this, char *key, uint32_t hash)
+dict_lookup_common(const dict_t *this, const char *key, const uint32_t hash)
{
int hashval = 0;
data_pair_t *pair;
@@ -379,8 +384,8 @@ dict_lookup(dict_t *this, char *key, data_t **data)
}
static int32_t
-dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash,
- gf_boolean_t replace)
+dict_set_lk(dict_t *this, char *key, const int key_len, data_t *value,
+ const uint32_t hash, gf_boolean_t replace)
{
int hashval = 0;
data_pair_t *pair;
@@ -396,7 +401,7 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash,
key_free = 1;
key_hash = (uint32_t)XXH64(key, keylen, 0);
} else {
- keylen = strlen(key);
+ keylen = key_len;
key_hash = hash;
}
@@ -406,6 +411,7 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash,
if (pair) {
data_t *unref_data = pair->value;
pair->value = data_ref(value);
+ this->totkvlen += (value->len - unref_data->len);
data_unref(unref_data);
if (key_free)
GF_FREE(key);
@@ -414,16 +420,15 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash,
}
}
- if (this->free_pair_in_use) {
+ if (this->free_pair.key) { /* the free_pair is used */
pair = mem_get(THIS->ctx->dict_pair_pool);
if (!pair) {
if (key_free)
GF_FREE(key);
return -1;
}
- } else {
+ } else { /* assign the pair to the free pair */
pair = &this->free_pair;
- this->free_pair_in_use = _gf_true;
}
if (key_free) {
@@ -433,9 +438,7 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash,
} else {
pair->key = (char *)GF_MALLOC(keylen + 1, gf_common_mt_char);
if (!pair->key) {
- if (pair == &this->free_pair) {
- this->free_pair_in_use = _gf_false;
- } else {
+ if (pair != &this->free_pair) {
mem_put(pair);
}
return -1;
@@ -444,6 +447,7 @@ dict_set_lk(dict_t *this, char *key, data_t *value, const uint32_t hash,
}
pair->key_hash = key_hash;
pair->value = data_ref(value);
+ this->totkvlen += (keylen + 1 + value->len);
/* If the divisor is 1, the modulo is always 0,
* in such case avoid hash calculation.
@@ -493,12 +497,12 @@ dict_setn(dict_t *this, char *key, const int keylen, data_t *value)
}
if (key) {
- key_hash = (int32_t)XXH64(key, keylen, 0);
+ key_hash = (uint32_t)XXH64(key, keylen, 0);
}
LOCK(&this->lock);
- ret = dict_set_lk(this, key, value, key_hash, 1);
+ ret = dict_set_lk(this, key, keylen, value, key_hash, 1);
UNLOCK(&this->lock);
@@ -532,7 +536,7 @@ dict_addn(dict_t *this, char *key, const int keylen, data_t *value)
LOCK(&this->lock);
- ret = dict_set_lk(this, key, value, key_hash, 0);
+ ret = dict_set_lk(this, key, keylen, value, key_hash, 0);
UNLOCK(&this->lock);
@@ -543,7 +547,7 @@ data_t *
dict_get(dict_t *this, char *key)
{
if (!this || !key) {
- gf_msg_callingfn("dict", GF_LOG_INFO, EINVAL, LG_MSG_INVALID_ARG,
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
"!this || key=%s", (key) ? key : "()");
return NULL;
}
@@ -558,7 +562,7 @@ dict_getn(dict_t *this, char *key, const int keylen)
uint32_t hash;
if (!this || !key) {
- gf_msg_callingfn("dict", GF_LOG_INFO, EINVAL, LG_MSG_INVALID_ARG,
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
"!this || key=%s", (key) ? key : "()");
return NULL;
}
@@ -641,6 +645,7 @@ dict_deln(dict_t *this, char *key, const int keylen)
else
this->members[hashval] = pair->hash_next;
+ this->totkvlen -= pair->value->len;
data_unref(pair->value);
if (pair->prev)
@@ -651,9 +656,10 @@ dict_deln(dict_t *this, char *key, const int keylen)
if (pair->next)
pair->next->prev = pair->prev;
+ this->totkvlen -= (strlen(pair->key) + 1);
GF_FREE(pair->key);
if (pair == &this->free_pair) {
- this->free_pair_in_use = _gf_false;
+ this->free_pair.key = NULL;
} else {
mem_put(pair);
}
@@ -693,16 +699,18 @@ dict_destroy(dict_t *this)
GF_FREE(prev->key);
if (prev != &this->free_pair) {
mem_put(prev);
+ } else {
+ this->free_pair.key = NULL;
}
total_pairs++;
prev = pair;
}
+ this->totkvlen = 0;
if (this->members != &this->members_internal) {
mem_put(this->members);
}
- GF_FREE(this->extra_free);
free(this->extra_stdfree);
/* update 'ctx->stats.dict.details' using max_count */
@@ -734,7 +742,7 @@ dict_unref(dict_t *this)
uint64_t ref = 0;
if (!this) {
- gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
"dict is NULL");
return;
}
@@ -749,7 +757,7 @@ dict_t *
dict_ref(dict_t *this)
{
if (!this) {
- gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
"dict is NULL");
return NULL;
}
@@ -801,6 +809,7 @@ int_to_data(int64_t value)
data->len = gf_asprintf(&data->data, "%" PRId64, value);
if (-1 == data->len) {
gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
data->len++; /* account for terminating NULL */
@@ -820,6 +829,7 @@ data_from_int64(int64_t value)
data->len = gf_asprintf(&data->data, "%" PRId64, value);
if (-1 == data->len) {
gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
data->len++; /* account for terminating NULL */
@@ -839,6 +849,7 @@ data_from_int32(int32_t value)
data->len = gf_asprintf(&data->data, "%" PRId32, value);
if (-1 == data->len) {
gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
@@ -859,6 +870,7 @@ data_from_int16(int16_t value)
data->len = gf_asprintf(&data->data, "%" PRId16, value);
if (-1 == data->len) {
gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
@@ -879,6 +891,7 @@ data_from_int8(int8_t value)
data->len = gf_asprintf(&data->data, "%d", value);
if (-1 == data->len) {
gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
@@ -899,6 +912,7 @@ data_from_uint64(uint64_t value)
data->len = gf_asprintf(&data->data, "%" PRIu64, value);
if (-1 == data->len) {
gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
@@ -919,6 +933,8 @@ data_from_double(double value)
data->len = gf_asprintf(&data->data, "%f", value);
if (data->len == -1) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
data->len++; /* account for terminating NULL */
@@ -938,6 +954,7 @@ data_from_uint32(uint32_t value)
data->len = gf_asprintf(&data->data, "%" PRIu32, value);
if (-1 == data->len) {
gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
@@ -957,6 +974,8 @@ data_from_uint16(uint16_t value)
}
data->len = gf_asprintf(&data->data, "%" PRIu16, value);
if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
return NULL;
}
@@ -1084,6 +1103,7 @@ static char *data_type_name[GF_DATA_TYPE_MAX] = {
[GF_DATA_TYPE_PTR] = "pointer",
[GF_DATA_TYPE_GFUUID] = "gf-uuid",
[GF_DATA_TYPE_IATT] = "iatt",
+ [GF_DATA_TYPE_MDATA] = "mdata",
};
int64_t
@@ -1091,117 +1111,146 @@ data_to_int64(data_t *data)
{
VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
- return (int64_t)strtoull(data->data, NULL, 0);
+ char *endptr = NULL;
+ int64_t value = 0;
+
+ errno = 0;
+ value = strtoll(data->data, &endptr, 0);
+
+ if (endptr && *endptr != '\0')
+ /* Unrecognized characters at the end of string. */
+ errno = EINVAL;
+ if (errno) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
+ LG_MSG_DATA_CONVERSION_ERROR,
+ "Error in data conversion: '%s' can't "
+ "be represented as int64_t",
+ data->data);
+ return -1;
+ }
+ return value;
}
+/* Like above but implies signed range check. */
+
+#define DATA_TO_RANGED_SIGNED(endptr, value, data, type, min, max) \
+ do { \
+ errno = 0; \
+ value = strtoll(data->data, &endptr, 0); \
+ if (endptr && *endptr != '\0') \
+ errno = EINVAL; \
+ if (errno || value > max || value < min) { \
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \
+ LG_MSG_DATA_CONVERSION_ERROR, \
+ "Error in data conversion: '%s' can't " \
+ "be represented as " #type, \
+ data->data); \
+ return -1; \
+ } \
+ return (type)value; \
+ } while (0)
+
int32_t
data_to_int32(data_t *data)
{
- VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ char *endptr = NULL;
+ int64_t value = 0;
- return strtoul(data->data, NULL, 0);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int32_t, INT_MIN, INT_MAX);
}
int16_t
data_to_int16(data_t *data)
{
- VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
-
- int16_t value = 0;
-
- errno = 0;
- value = strtol(data->data, NULL, 0);
-
- if ((value > SHRT_MAX) || (value < SHRT_MIN)) {
- errno = ERANGE;
- gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
- LG_MSG_DATA_CONVERSION_ERROR,
- "Error in data"
- " conversion: detected overflow");
- return -1;
- }
+ char *endptr = NULL;
+ int64_t value = 0;
- return (int16_t)value;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int16_t, SHRT_MIN, SHRT_MAX);
}
int8_t
data_to_int8(data_t *data)
{
+ char *endptr = NULL;
+ int64_t value = 0;
+
VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int8_t, CHAR_MIN, CHAR_MAX);
+}
- int8_t value = 0;
+uint64_t
+data_to_uint64(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+
+ char *endptr = NULL;
+ uint64_t value = 0;
errno = 0;
- value = strtol(data->data, NULL, 0);
+ value = strtoull(data->data, &endptr, 0);
- if ((value > SCHAR_MAX) || (value < SCHAR_MIN)) {
- errno = ERANGE;
+ if (endptr && *endptr != '\0')
+ errno = EINVAL;
+ if (errno) {
gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
LG_MSG_DATA_CONVERSION_ERROR,
- "Error in data"
- " conversion: detected overflow");
+ "Error in data conversion: '%s' can't "
+ "be represented as uint64_t",
+ data->data);
return -1;
}
-
- return (int8_t)value;
+ return value;
}
-uint64_t
-data_to_uint64(data_t *data)
-{
- VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+/* Like above but implies unsigned range check. */
- return strtoll(data->data, NULL, 0);
-}
+#define DATA_TO_RANGED_UNSIGNED(endptr, value, data, type, max) \
+ do { \
+ errno = 0; \
+ value = strtoull(data->data, &endptr, 0); \
+ if (endptr && *endptr != '\0') \
+ errno = EINVAL; \
+ if (errno || value > max) { \
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \
+ LG_MSG_DATA_CONVERSION_ERROR, \
+ "Error in data conversion: '%s' can't " \
+ "be represented as " #type, \
+ data->data); \
+ return -1; \
+ } \
+ return (type)value; \
+ } while (0)
uint32_t
data_to_uint32(data_t *data)
{
- VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ char *endptr = NULL;
+ uint64_t value = 0;
- return strtol(data->data, NULL, 0);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint32_t, UINT_MAX);
}
uint16_t
data_to_uint16(data_t *data)
{
- VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
-
- uint16_t value = 0;
+ char *endptr = NULL;
+ uint64_t value = 0;
- errno = 0;
- value = strtol(data->data, NULL, 0);
-
- if ((USHRT_MAX - value) < 0) {
- errno = ERANGE;
- gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
- LG_MSG_DATA_CONVERSION_ERROR,
- "Error in data conversion: "
- "overflow detected");
- return -1;
- }
-
- return (uint16_t)value;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint16_t, USHRT_MAX);
}
uint8_t
data_to_uint8(data_t *data)
{
- VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
-
- errno = 0;
- uint32_t value = strtol(data->data, NULL, 0);
-
- if ((UCHAR_MAX - (uint8_t)value) < 0) {
- errno = ERANGE;
- gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
- LG_MSG_DATA_CONVERSION_ERROR,
- "data "
- "conversion overflow detected");
- return -1;
- }
+ char *endptr = NULL;
+ uint64_t value = 0;
- return (uint8_t)value;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint8_t, UCHAR_MAX);
}
char *
@@ -1235,8 +1284,8 @@ data_to_iatt(data_t *data, char *key)
* pass more data but are backward compatible (if the initial contents
* of the struct are maintained, of course). */
if (data->len < sizeof(struct iatt)) {
- gf_msg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF,
- "data value for '%s' is smaller than expected", key);
+ gf_smsg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF,
+ "key=%s", key, NULL);
return NULL;
}
@@ -1253,8 +1302,8 @@ int
dict_remove_foreach_fn(dict_t *d, char *k, data_t *v, void *_tmp)
{
if (!d || !k) {
- gf_msg("glusterfs", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ENTRY,
- "%s is NULL", d ? "key" : "dictionary");
+ gf_smsg("glusterfs", GF_LOG_WARNING, EINVAL, LG_MSG_KEY_OR_VALUE_NULL,
+ "d=%s", d ? "key" : "dictionary", NULL);
return -1;
}
@@ -1446,32 +1495,13 @@ fail:
* -val error, val = errno
*/
-int
-dict_get_with_ref(dict_t *this, char *key, data_t **data)
-{
- if (!this || !key || !data) {
- gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
- "dict OR key (%s) is NULL", key);
- return -EINVAL;
- }
-
- return dict_get_with_refn(this, key, strlen(key), data);
-}
-
-int
+static int
dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data)
{
data_pair_t *pair = NULL;
int ret = -ENOENT;
uint32_t hash;
- if (!this || !key || !data) {
- gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
- "dict OR key (%s) is NULL", key);
- ret = -EINVAL;
- goto err;
- }
-
hash = (uint32_t)XXH64(key, keylen, 0);
LOCK(&this->lock);
@@ -1484,10 +1514,22 @@ dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data)
}
}
UNLOCK(&this->lock);
-err:
+
return ret;
}
+int
+dict_get_with_ref(dict_t *this, char *key, data_t **data)
+{
+ if (!this || !key || !data) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict OR key (%s) is NULL", key);
+ return -EINVAL;
+ }
+
+ return dict_get_with_refn(this, key, strlen(key), data);
+}
+
static int
data_to_ptr_common(data_t *data, void **val)
{
@@ -1661,7 +1703,7 @@ dict_get_int8(dict_t *this, char *key, int8_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1707,7 +1749,7 @@ dict_get_int16(dict_t *this, char *key, int16_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1779,7 +1821,7 @@ dict_get_int32(dict_t *this, char *key, int32_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1844,7 +1886,7 @@ dict_get_int64(dict_t *this, char *key, int64_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1889,7 +1931,7 @@ dict_get_uint16(dict_t *this, char *key, uint16_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1934,7 +1976,7 @@ dict_get_uint32(dict_t *this, char *key, uint32_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -1979,7 +2021,7 @@ dict_get_uint64(dict_t *this, char *key, uint64_t *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -2075,7 +2117,7 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op)
*/
GF_ASSERT(flag >= 0 && flag < DICT_MAX_FLAGS);
- hash = (int32_t)XXH64(key, strlen(key), 0);
+ hash = (uint32_t)XXH64(key, strlen(key), 0);
LOCK(&this->lock);
{
pair = dict_lookup_common(this, key, hash);
@@ -2089,8 +2131,8 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op)
} else {
ptr = GF_CALLOC(1, DICT_MAX_FLAGS / 8, gf_common_mt_char);
if (!ptr) {
- gf_msg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "unable to allocate flag bit array");
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "flag bit array", NULL);
ret = -ENOMEM;
goto err;
}
@@ -2098,8 +2140,8 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op)
data = data_from_dynptr(ptr, DICT_MAX_FLAGS / 8);
if (!data) {
- gf_msg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "unable to allocate data");
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, "data",
+ NULL);
GF_FREE(ptr);
ret = -ENOMEM;
goto err;
@@ -2110,30 +2152,29 @@ _dict_modify_flag(dict_t *this, char *key, int flag, int op)
else
BIT_CLEAR((unsigned char *)(data->data), flag);
- if (this->free_pair_in_use) {
+ if (this->free_pair.key) { /* the free pair is in use */
pair = mem_get0(THIS->ctx->dict_pair_pool);
if (!pair) {
- gf_msg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "unable to allocate dict pair");
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "dict pair", NULL);
ret = -ENOMEM;
goto err;
}
- } else {
+ } else { /* use the free pair */
pair = &this->free_pair;
- this->free_pair_in_use = _gf_true;
}
pair->key = (char *)GF_MALLOC(strlen(key) + 1, gf_common_mt_char);
if (!pair->key) {
- gf_msg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "unable to allocate dict pair");
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "dict pair", NULL);
ret = -ENOMEM;
goto err;
}
strcpy(pair->key, key);
pair->key_hash = hash;
pair->value = data_ref(data);
-
+ this->totkvlen += (strlen(key) + 1 + data->len);
hashval = hash % this->hash_size;
pair->hash_next = this->members[hashval];
this->members[hashval] = pair;
@@ -2158,12 +2199,11 @@ err:
UNLOCK(&this->lock);
if (pair) {
- if (pair->key)
- free(pair->key);
-
- if (pair == &this->free_pair) {
- this->free_pair_in_use = _gf_false;
- } else {
+ if (pair->key) {
+ GF_FREE(pair->key);
+ pair->key = NULL;
+ }
+ if (pair != &this->free_pair) {
mem_put(pair);
}
}
@@ -2171,8 +2211,8 @@ err:
if (data)
data_destroy(data);
- gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_DICT_SET_FAILED,
- "unable to set key (%s) in dict ", key);
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_DICT_SET_FAILED, "key=%s", key,
+ NULL);
return ret;
}
@@ -2204,7 +2244,7 @@ dict_get_double(dict_t *this, char *key, double *val)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !val) {
+ if (!val) {
ret = -EINVAL;
goto err;
}
@@ -2287,7 +2327,7 @@ dict_get_ptr(dict_t *this, char *key, void **ptr)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !ptr) {
+ if (!ptr) {
ret = -EINVAL;
goto err;
}
@@ -2317,7 +2357,7 @@ dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len)
data_t *data = NULL;
int ret = 0;
- if (!this || !key || !ptr) {
+ if (!ptr) {
ret = -EINVAL;
goto err;
}
@@ -2375,7 +2415,7 @@ dict_get_str(dict_t *this, char *key, char **str)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !str) {
+ if (!str) {
goto err;
}
ret = dict_get_with_ref(this, key, &data);
@@ -2549,7 +2589,7 @@ dict_get_bin(dict_t *this, char *key, void **bin)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !bin) {
+ if (!bin) {
goto err;
}
@@ -2652,7 +2692,7 @@ dict_get_gfuuid(dict_t *this, char *key, uuid_t *gfid)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !gfid) {
+ if (!gfid) {
goto err;
}
ret = dict_get_with_ref(this, key, &data);
@@ -2672,6 +2712,45 @@ err:
}
int
+dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata,
+ bool is_static)
+{
+ return dict_set_bin_common(this, key, mdata, sizeof(struct mdata_iatt),
+ is_static, GF_DATA_TYPE_MDATA);
+}
+
+int
+dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata)
+{
+ data_t *data = NULL;
+ int ret = -EINVAL;
+
+ if (!mdata) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_MDATA, key, -EINVAL);
+ if (data->len < sizeof(struct mdata_iatt)) {
+ gf_smsg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF,
+ "key=%s", key, NULL);
+ ret = -ENOBUFS;
+ goto err;
+ }
+
+ memcpy(mdata, data->data, min(data->len, sizeof(struct mdata_iatt)));
+
+err:
+ if (data)
+ data_unref(data);
+
+ return ret;
+}
+
+int
dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static)
{
return dict_set_bin_common(this, key, iatt, sizeof(struct iatt), is_static,
@@ -2684,7 +2763,7 @@ dict_get_iatt(dict_t *this, char *key, struct iatt *iatt)
data_t *data = NULL;
int ret = -EINVAL;
- if (!this || !key || !iatt) {
+ if (!iatt) {
goto err;
}
ret = dict_get_with_ref(this, key, &data);
@@ -2744,7 +2823,7 @@ dict_get_str_boolean(dict_t *this, char *key, int default_val)
VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = gf_string2boolean(data->data, &boo);
+ ret = gf_strn2boolean(data->data, data->len - 1, &boo);
if (ret == -1)
goto err;
@@ -2764,6 +2843,7 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
int ret = -EINVAL;
uint32_t hash;
uint32_t replacekey_hash;
+ int replacekey_len;
/* replacing a key by itself is a NO-OP */
if (strcmp(key, replace_key) == 0)
@@ -2776,7 +2856,8 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
}
hash = (uint32_t)XXH64(key, strlen(key), 0);
- replacekey_hash = (uint32_t)XXH64(replace_key, strlen(replace_key), 0);
+ replacekey_len = strlen(replace_key);
+ replacekey_hash = (uint32_t)XXH64(replace_key, replacekey_len, 0);
LOCK(&this->lock);
{
@@ -2785,8 +2866,8 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
if (!pair)
ret = -ENODATA;
else
- ret = dict_set_lk(this, replace_key, pair->value, replacekey_hash,
- 1);
+ ret = dict_set_lk(this, replace_key, replacekey_len, pair->value,
+ replacekey_hash, 1);
}
UNLOCK(&this->lock);
@@ -2805,10 +2886,6 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
* 4 4 4 <key len> <value len>
*/
-#define DICT_HDR_LEN 4
-#define DICT_DATA_HDR_KEY_LEN 4
-#define DICT_DATA_HDR_VAL_LEN 4
-
/**
* dict_serialized_length_lk - return the length of serialized dict. This
* procedure has to be called with this->lock held.
@@ -2818,58 +2895,20 @@ dict_rename_key(dict_t *this, char *key, char *replace_key)
* : failure: -errno
*/
-static int
+int
dict_serialized_length_lk(dict_t *this)
{
int ret = -EINVAL;
int count = this->count;
- int len = DICT_HDR_LEN;
- data_pair_t *pair = this->members_list;
+ const int keyhdrlen = DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN;
if (count < 0) {
- gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_COUNT_LESS_THAN_ZERO,
- "count (%d) < 0!", count);
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
goto out;
}
- while (count) {
- if (!pair) {
- gf_msg("dict", GF_LOG_ERROR, EINVAL,
- LG_MSG_COUNT_LESS_THAN_DATA_PAIRS,
- "less than count data pairs found!");
- goto out;
- }
-
- len += DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN;
-
- if (!pair->key) {
- gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_NULL_PTR,
- "pair->key is null!");
- goto out;
- }
-
- len += strlen(pair->key) + 1 /* for '\0' */;
-
- if (!pair->value) {
- gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_NULL_PTR,
- "pair->value is null!");
- goto out;
- }
-
- if (pair->value->len < 0) {
- gf_msg("dict", GF_LOG_ERROR, EINVAL,
- LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, "value->len (%d) < 0",
- pair->value->len);
- goto out;
- }
-
- len += pair->value->len;
-
- pair = pair->next;
- count--;
- }
-
- ret = len;
+ ret = DICT_HDR_LEN + this->totkvlen + (count * keyhdrlen);
out:
return ret;
}
@@ -2896,14 +2935,13 @@ dict_serialize_lk(dict_t *this, char *buf)
int32_t netword = 0;
if (!buf) {
- gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
- "buf is null!");
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, NULL);
goto out;
}
if (count < 0) {
- gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
- "count (%d) < 0!", count);
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
goto out;
}
@@ -2913,14 +2951,13 @@ dict_serialize_lk(dict_t *this, char *buf)
while (count) {
if (!pair) {
- gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT,
- "less than count data pairs found!");
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ NULL);
goto out;
}
if (!pair->key) {
- gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR,
- "pair->key is null!");
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, NULL);
goto out;
}
@@ -2930,8 +2967,7 @@ dict_serialize_lk(dict_t *this, char *buf)
buf += DICT_DATA_HDR_KEY_LEN;
if (!pair->value) {
- gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR,
- "pair->value is null!");
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, NULL);
goto out;
}
@@ -3079,8 +3115,8 @@ dict_unserialize(char *orig_buf, int32_t size, dict_t **fill)
buf += DICT_HDR_LEN;
if (count < 0) {
- gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
- "count (%d) <= 0", count);
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
goto out;
}
@@ -3145,12 +3181,12 @@ dict_unserialize(char *orig_buf, int32_t size, dict_t **fill)
goto out;
}
value->len = vallen;
- value->data = memdup(buf, vallen);
+ value->data = gf_memdup(buf, vallen);
value->data_type = GF_DATA_TYPE_STR_OLD;
value->is_static = _gf_false;
buf += vallen;
- ret = dict_add(*fill, key, value);
+ ret = dict_addn(*fill, key, keylen, value);
if (ret < 0)
goto out;
}
@@ -3236,32 +3272,30 @@ dict_serialize_value_with_delim_lk(dict_t *this, char *buf, int32_t *serz_len,
data_pair_t *pair = this->members_list;
if (!buf) {
- gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "buf is null");
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, NULL);
goto out;
}
if (count < 0) {
- gf_msg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
- "count (%d) < 0", count);
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "count=%d",
+ count, NULL);
goto out;
}
while (count) {
if (!pair) {
- gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT,
- "less than count data pairs found");
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ NULL);
goto out;
}
if (!pair->key || !pair->value) {
- gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_KEY_OR_VALUE_NULL,
- "key or value is null");
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_KEY_OR_VALUE_NULL, NULL);
goto out;
}
if (!pair->value->data) {
- gf_msg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_VALUE_IN_DICT,
- "null value found in dict");
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_VALUE_IN_DICT, NULL);
goto out;
}
@@ -3353,12 +3387,11 @@ dict_dump_to_log(dict_t *dict)
ret = dict_dump_to_str(dict, dump, dump_size, format);
if (ret) {
- gf_msg("dict", GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT,
- "Failed to log dictionary");
+ gf_smsg("dict", GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, NULL);
goto out;
}
- gf_msg("dict", GF_LOG_INFO, 0, LG_MSG_DICT_ERROR, "dict=%p (%s)", dict,
- dump);
+ gf_smsg("dict", GF_LOG_INFO, 0, LG_MSG_DICT_ERROR, "dict=%p", dict,
+ "dump=%s", dump, NULL);
out:
GF_FREE(dump);
@@ -3391,8 +3424,8 @@ dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain)
ret = dict_dump_to_str(dict, dump, dump_size, format);
if (ret) {
- gf_msg(domain, GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT,
- "Failed to log dictionary %s", dict_name);
+ gf_smsg(domain, GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, "name=%s",
+ dict_name, NULL);
goto out;
}
gf_proc_dump_build_key(key, domain, "%s", dict_name);
diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
index 041a7e6c583..fb4fb845b40 100644
--- a/libglusterfs/src/event-epoll.c
+++ b/libglusterfs/src/event-epoll.c
@@ -8,20 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include <sys/poll.h>
#include <pthread.h>
-#include <unistd.h>
-#include <fcntl.h>
#include <stdlib.h>
#include <errno.h>
-#include <string.h>
-#include "logging.h"
-#include "gf-event.h"
-#include "mem-pool.h"
-#include "common-utils.h"
-#include "syscall.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/gf-event.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
#ifdef HAVE_SYS_EPOLL_H
#include <sys/epoll.h>
@@ -69,18 +63,30 @@ __event_newtable(struct event_pool *event_pool, int table_idx)
}
static int
+event_slot_ref(struct event_slot_epoll *slot)
+{
+ if (!slot)
+ return -1;
+
+ return GF_ATOMIC_INC(slot->ref);
+}
+
+static int
__event_slot_alloc(struct event_pool *event_pool, int fd,
- char notify_poller_death)
+ char notify_poller_death, struct event_slot_epoll **slot)
{
int i = 0;
+ int j = 0;
int table_idx = -1;
int gen = -1;
struct event_slot_epoll *table = NULL;
- for (i = 0; i < EVENT_EPOLL_TABLES; i++) {
+retry:
+
+ while (i < EVENT_EPOLL_TABLES) {
switch (event_pool->slots_used[i]) {
case EVENT_EPOLL_SLOTS:
- continue;
+ break;
case 0:
if (!event_pool->ereg[i]) {
table = __event_newtable(event_pool, i);
@@ -98,6 +104,7 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
if (table)
/* break out of the loop */
break;
+ i++;
}
if (!table)
@@ -105,20 +112,20 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
table_idx = i;
- for (i = 0; i < EVENT_EPOLL_SLOTS; i++) {
- if (table[i].fd == -1) {
+ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) {
+ if (table[j].fd == -1) {
/* wipe everything except bump the generation */
- gen = table[i].gen;
- memset(&table[i], 0, sizeof(table[i]));
- table[i].gen = gen + 1;
+ gen = table[j].gen;
+ memset(&table[j], 0, sizeof(table[j]));
+ table[j].gen = gen + 1;
- LOCK_INIT(&table[i].lock);
- INIT_LIST_HEAD(&table[i].poller_death);
+ LOCK_INIT(&table[j].lock);
+ INIT_LIST_HEAD(&table[j].poller_death);
- table[i].fd = fd;
+ table[j].fd = fd;
if (notify_poller_death) {
- table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i;
- list_add_tail(&table[i].poller_death,
+ table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j;
+ list_add_tail(&table[j].poller_death,
&event_pool->poller_death);
}
@@ -128,18 +135,26 @@ __event_slot_alloc(struct event_pool *event_pool, int fd,
}
}
- return table_idx * EVENT_EPOLL_SLOTS + i;
+ if (j == EVENT_EPOLL_SLOTS) {
+ table = NULL;
+ i++;
+ goto retry;
+ } else {
+ (*slot) = &table[j];
+ event_slot_ref(*slot);
+ return table_idx * EVENT_EPOLL_SLOTS + j;
+ }
}
static int
event_slot_alloc(struct event_pool *event_pool, int fd,
- char notify_poller_death)
+ char notify_poller_death, struct event_slot_epoll **slot)
{
int idx = -1;
pthread_mutex_lock(&event_pool->mutex);
{
- idx = __event_slot_alloc(event_pool, fd, notify_poller_death);
+ idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot);
}
pthread_mutex_unlock(&event_pool->mutex);
@@ -153,6 +168,7 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx)
int offset = 0;
struct event_slot_epoll *table = NULL;
struct event_slot_epoll *slot = NULL;
+ int fd = -1;
table_idx = idx / EVENT_EPOLL_SLOTS;
offset = idx % EVENT_EPOLL_SLOTS;
@@ -164,11 +180,13 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx)
slot = &table[offset];
slot->gen++;
+ fd = slot->fd;
slot->fd = -1;
slot->handled_error = 0;
slot->in_handler = 0;
list_del_init(&slot->poller_death);
- event_pool->slots_used[table_idx]--;
+ if (fd != -1)
+ event_pool->slots_used[table_idx]--;
return;
}
@@ -185,15 +203,6 @@ event_slot_dealloc(struct event_pool *event_pool, int idx)
return;
}
-static int
-event_slot_ref(struct event_slot_epoll *slot)
-{
- if (!slot)
- return -1;
-
- return GF_ATOMIC_INC(slot->ref);
-}
-
static struct event_slot_epoll *
event_slot_get(struct event_pool *event_pool, int idx)
{
@@ -287,9 +296,8 @@ event_pool_new_epoll(int count, int eventthreadcount)
epfd = epoll_create(count);
if (epfd == -1) {
- gf_msg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_CREATE_FAILED,
- "epoll fd creation "
- "failed");
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_CREATE_FAILED,
+ NULL);
GF_FREE(event_pool->reg);
GF_FREE(event_pool);
event_pool = NULL;
@@ -323,8 +331,8 @@ __slot_update_events(struct event_slot_epoll *slot, int poll_in, int poll_out)
/* do nothing */
break;
default:
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN,
- "invalid poll_in value %d", poll_in);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN,
+ "value=%d", poll_in, NULL);
break;
}
@@ -339,8 +347,8 @@ __slot_update_events(struct event_slot_epoll *slot, int poll_in, int poll_out)
/* do nothing */
break;
default:
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT,
- "invalid poll_out value %d", poll_out);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT,
+ "value=%d", poll_out, NULL);
break;
}
}
@@ -379,17 +387,10 @@ event_register_epoll(struct event_pool *event_pool, int fd,
if (destroy == 1)
goto out;
- idx = event_slot_alloc(event_pool, fd, notify_poller_death);
+ idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot);
if (idx == -1) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
- "could not find slot for fd=%d", fd);
- return -1;
- }
-
- slot = event_slot_get(event_pool, idx);
- if (!slot) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
- "could not find slot for fd=%d idx=%d", fd, idx);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ NULL);
return -1;
}
@@ -424,10 +425,8 @@ event_register_epoll(struct event_pool *event_pool, int fd,
UNLOCK(&slot->lock);
if (ret == -1) {
- gf_msg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_ADD_FAILED,
- "failed to add fd(=%d) to "
- "epoll fd(=%d)",
- fd, event_pool->fd);
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_ADD_FAILED,
+ "fd=%d", fd, "epoll_fd=%d", event_pool->fd, NULL);
event_slot_unref(event_pool, slot, idx);
idx = -1;
}
@@ -456,8 +455,8 @@ event_unregister_epoll_common(struct event_pool *event_pool, int fd, int idx,
slot = event_slot_get(event_pool, idx);
if (!slot) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
- "could not find slot for fd=%d idx=%d", fd, idx);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
return -1;
}
@@ -468,10 +467,8 @@ event_unregister_epoll_common(struct event_pool *event_pool, int fd, int idx,
ret = epoll_ctl(event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
if (ret == -1) {
- gf_msg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_DEL_FAILED,
- "fail to del "
- "fd(=%d) from epoll fd(=%d)",
- fd, event_pool->fd);
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_DEL_FAILED,
+ "fd=%d", fd, "epoll_fd=%d", event_pool->fd, NULL);
goto unlock;
}
@@ -523,8 +520,8 @@ event_select_on_epoll(struct event_pool *event_pool, int fd, int idx,
slot = event_slot_get(event_pool, idx);
if (!slot) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
- "could not find slot for fd=%d idx=%d", fd, idx);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
return -1;
}
@@ -555,10 +552,8 @@ event_select_on_epoll(struct event_pool *event_pool, int fd, int idx,
ret = epoll_ctl(event_pool->fd, EPOLL_CTL_MOD, fd, &epoll_event);
if (ret == -1) {
- gf_msg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_MODIFY_FAILED,
- "failed to "
- "modify fd(=%d) events to %d",
- fd, epoll_event.events);
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_MODIFY_FAILED,
+ "fd=%d", fd, "events=%d", epoll_event.events, NULL);
}
}
unlock:
@@ -593,8 +588,8 @@ event_dispatch_epoll_handler(struct event_pool *event_pool,
slot = event_slot_get(event_pool, idx);
if (!slot) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
- "could not find slot for idx=%d", idx);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "idx=%d", idx,
+ NULL);
return -1;
}
@@ -602,20 +597,17 @@ event_dispatch_epoll_handler(struct event_pool *event_pool,
{
fd = slot->fd;
if (fd == -1) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_STALE_FD_FOUND,
- "stale fd found on "
- "idx=%d, gen=%d, events=%d, slot->gen=%d",
- idx, gen, event->events, slot->gen);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_STALE_FD_FOUND, "idx=%d",
+ idx, "gen=%d", gen, "events=%d", event->events,
+ "slot->gen=%d", slot->gen, NULL);
/* fd got unregistered in another thread */
goto pre_unlock;
}
if (gen != slot->gen) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_GENERATION_MISMATCH,
- "generation "
- "mismatch on idx=%d, gen=%d, slot->gen=%d, "
- "slot->fd=%d",
- idx, gen, slot->gen, slot->fd);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_GENERATION_MISMATCH,
+ "idx=%d", idx, "gen=%d", gen, "slot->gen=%d", slot->gen,
+ "slot->fd=%d", slot->fd, NULL);
/* slot was re-used and therefore is another fd! */
goto pre_unlock;
}
@@ -623,6 +615,12 @@ event_dispatch_epoll_handler(struct event_pool *event_pool,
handler = slot->handler;
data = slot->data;
+ if (slot->in_handler > 0) {
+ /* Another handler is inprogress, skip this one. */
+ handler = NULL;
+ goto pre_unlock;
+ }
+
if (slot->handled_error) {
handled_error_previously = _gf_true;
} else {
@@ -639,10 +637,9 @@ pre_unlock:
goto out;
if (!handled_error_previously) {
- ret = handler(fd, idx, gen, data,
- (event->events & (EPOLLIN | EPOLLPRI)),
- (event->events & (EPOLLOUT)),
- (event->events & (EPOLLERR | EPOLLHUP)), 0);
+ handler(fd, idx, gen, data, (event->events & (EPOLLIN | EPOLLPRI)),
+ (event->events & (EPOLLOUT)),
+ (event->events & (EPOLLERR | EPOLLHUP)), 0);
}
out:
event_slot_unref(event_pool, slot, idx);
@@ -669,10 +666,8 @@ event_dispatch_epoll_worker(void *data)
GF_VALIDATE_OR_GOTO("event", event_pool, out);
- gf_msg("epoll", GF_LOG_INFO, 0, LG_MSG_STARTED_EPOLL_THREAD,
- "Started"
- " thread with index %d",
- myindex - 1);
+ gf_smsg("epoll", GF_LOG_INFO, 0, LG_MSG_STARTED_EPOLL_THREAD, "index=%d",
+ myindex - 1, NULL);
pthread_mutex_lock(&event_pool->mutex);
{
@@ -736,8 +731,8 @@ event_dispatch_epoll_worker(void *data)
}
pthread_mutex_unlock(&event_pool->mutex);
- gf_msg("epoll", GF_LOG_INFO, 0, LG_MSG_EXITED_EPOLL_THREAD,
- "Exited thread with index %d", myindex);
+ gf_smsg("epoll", GF_LOG_INFO, 0, LG_MSG_EXITED_EPOLL_THREAD,
+ "index=%d", myindex, NULL);
goto out;
}
@@ -755,8 +750,8 @@ event_dispatch_epoll_worker(void *data)
ret = event_dispatch_epoll_handler(event_pool, &event);
if (ret) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_EXITED_EPOLL_THREAD,
- "Failed to dispatch handler");
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_DISPATCH_HANDLER_FAILED,
+ NULL);
}
}
out:
@@ -775,9 +770,6 @@ event_dispatch_epoll(struct event_pool *event_pool)
int pollercount = 0;
int ret = -1;
struct event_thread_data *ev_data = NULL;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
/* Start the configured number of pollers */
pthread_mutex_lock(&event_pool->mutex);
@@ -811,10 +803,8 @@ event_dispatch_epoll(struct event_pool *event_pool)
ev_data->event_pool = event_pool;
ev_data->event_index = i + 1;
- snprintf(thread_name, sizeof(thread_name), "epoll%03hx",
- (i & 0x3ff));
ret = gf_thread_create(&t_id, NULL, event_dispatch_epoll_worker,
- ev_data, thread_name);
+ ev_data, "epoll%03hx", i & 0x3ff);
if (!ret) {
event_pool->pollers[i] = t_id;
@@ -825,9 +815,8 @@ event_dispatch_epoll(struct event_pool *event_pool)
if (i != 0)
pthread_detach(event_pool->pollers[i]);
} else {
- gf_msg("epoll", GF_LOG_WARNING, 0,
- LG_MSG_START_EPOLL_THREAD_FAILED,
- "Failed to start thread for index %d", i);
+ gf_smsg("epoll", GF_LOG_WARNING, 0,
+ LG_MSG_START_EPOLL_THREAD_FAILED, "index=%d", i, NULL);
if (i == 0) {
GF_FREE(ev_data);
break;
@@ -877,9 +866,6 @@ event_reconfigure_threads_epoll(struct event_pool *event_pool, int value)
pthread_t t_id;
int oldthreadcount;
struct event_thread_data *ev_data = NULL;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
pthread_mutex_lock(&event_pool->mutex);
{
@@ -919,17 +905,13 @@ event_reconfigure_threads_epoll(struct event_pool *event_pool, int value)
ev_data->event_pool = event_pool;
ev_data->event_index = i + 1;
- snprintf(thread_name, sizeof(thread_name), "epoll%03hx",
- (i & 0x3ff));
ret = gf_thread_create(&t_id, NULL,
event_dispatch_epoll_worker, ev_data,
- thread_name);
+ "epoll%03hx", i & 0x3ff);
if (ret) {
- gf_msg("epoll", GF_LOG_WARNING, 0,
- LG_MSG_START_EPOLL_THREAD_FAILED,
- "Failed to start thread"
- " for index %d",
- i);
+ gf_smsg("epoll", GF_LOG_WARNING, 0,
+ LG_MSG_START_EPOLL_THREAD_FAILED, "index=%d", i,
+ NULL);
GF_FREE(ev_data);
} else {
pthread_detach(t_id);
@@ -992,8 +974,8 @@ event_handled_epoll(struct event_pool *event_pool, int fd, int idx, int gen)
slot = event_slot_get(event_pool, idx);
if (!slot) {
- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND,
- "could not find slot for fd=%d idx=%d", fd, idx);
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
return -1;
}
@@ -1012,7 +994,7 @@ event_handled_epoll(struct event_pool *event_pool, int fd, int idx, int gen)
" from gen=%d to slot->gen=%d, fd=%d, "
"slot->fd=%d",
idx, gen, slot->gen, fd, slot->fd);
- goto post_unlock;
+ goto unlock;
}
/* This call also picks up the changes made by another
@@ -1027,7 +1009,7 @@ event_handled_epoll(struct event_pool *event_pool, int fd, int idx, int gen)
ret = epoll_ctl(event_pool->fd, EPOLL_CTL_MOD, fd, &epoll_event);
}
}
-post_unlock:
+unlock:
UNLOCK(&slot->lock);
event_slot_unref(event_pool, slot, idx);
diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c
index 4845330f62b..379fed866be 100644
--- a/libglusterfs/src/event-history.c
+++ b/libglusterfs/src/event-history.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "event-history.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/event-history.h"
+#include "glusterfs/libglusterfs-messages.h"
eh_t *
eh_new(size_t buffer_size, gf_boolean_t use_buffer_once,
diff --git a/libglusterfs/src/event-poll.c b/libglusterfs/src/event-poll.c
index 5bac4291c47..2cba963f096 100644
--- a/libglusterfs/src/event-poll.c
+++ b/libglusterfs/src/event-poll.c
@@ -16,12 +16,12 @@
#include <errno.h>
#include <string.h>
-#include "logging.h"
-#include "gf-event.h"
-#include "mem-pool.h"
-#include "common-utils.h"
-#include "syscall.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/gf-event.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
struct event_slot_poll {
int fd;
@@ -35,7 +35,7 @@ event_register_poll(struct event_pool *event_pool, int fd,
event_handler_t handler, void *data, int poll_in,
int poll_out, char notify_poller_death);
-static int
+static void
__flush_fd(int fd, int idx, int gen, void *data, int poll_in, int poll_out,
int poll_err, char event_thread_died)
{
@@ -43,19 +43,17 @@ __flush_fd(int fd, int idx, int gen, void *data, int poll_in, int poll_out,
int ret = -1;
if (!poll_in)
- return ret;
+ return;
do {
ret = sys_read(fd, buf, 64);
if (ret == -1 && errno != EAGAIN) {
- gf_msg("poll", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
- "read on %d returned "
- "error",
- fd);
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_READ_FILE_FAILED,
+ "fd=%d", fd, NULL);
}
} while (ret == 64);
- return ret;
+ return;
}
static int
@@ -111,8 +109,7 @@ event_pool_new_poll(int count, int eventthreadcount)
ret = pipe(event_pool->breaker);
if (ret == -1) {
- gf_msg("poll", GF_LOG_ERROR, errno, LG_MSG_PIPE_CREATE_FAILED,
- "pipe creation failed");
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_PIPE_CREATE_FAILED, NULL);
GF_FREE(event_pool->reg);
GF_FREE(event_pool);
return NULL;
@@ -120,8 +117,7 @@ event_pool_new_poll(int count, int eventthreadcount)
ret = fcntl(event_pool->breaker[0], F_SETFL, O_NONBLOCK);
if (ret == -1) {
- gf_msg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED,
- "could not set pipe to non blocking mode");
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, NULL);
sys_close(event_pool->breaker[0]);
sys_close(event_pool->breaker[1]);
event_pool->breaker[0] = event_pool->breaker[1] = -1;
@@ -133,8 +129,7 @@ event_pool_new_poll(int count, int eventthreadcount)
ret = fcntl(event_pool->breaker[1], F_SETFL, O_NONBLOCK);
if (ret == -1) {
- gf_msg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED,
- "could not set pipe to non blocking mode");
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, NULL);
sys_close(event_pool->breaker[0]);
sys_close(event_pool->breaker[1]);
@@ -148,8 +143,7 @@ event_pool_new_poll(int count, int eventthreadcount)
ret = event_register_poll(event_pool, event_pool->breaker[0], __flush_fd,
NULL, 1, 0, 0);
if (ret == -1) {
- gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_REGISTER_PIPE_FAILED,
- "could not register pipe fd with poll event loop");
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_REGISTER_PIPE_FAILED, NULL);
sys_close(event_pool->breaker[0]);
sys_close(event_pool->breaker[1]);
event_pool->breaker[0] = event_pool->breaker[1] = -1;
@@ -160,11 +154,8 @@ event_pool_new_poll(int count, int eventthreadcount)
}
if (eventthreadcount > 1) {
- gf_msg("poll", GF_LOG_INFO, 0, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS,
- "Currently poll "
- "does not use multiple event processing threads, "
- "thread count (%d) ignored",
- eventthreadcount);
+ gf_smsg("poll", GF_LOG_INFO, 0, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS,
+ "count=%d", eventthreadcount, NULL);
}
/* although, eventhreadcount for poll implementation is always
@@ -215,8 +206,8 @@ event_register_poll(struct event_pool *event_pool, int fd,
/* do nothing */
break;
default:
- gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN,
- "invalid poll_in value %d", poll_in);
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN,
+ "value=%d", poll_in, NULL);
break;
}
@@ -231,8 +222,8 @@ event_register_poll(struct event_pool *event_pool, int fd,
/* do nothing */
break;
default:
- gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT,
- "invalid poll_out value %d", poll_out);
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT,
+ "value=%d", poll_out, NULL);
break;
}
@@ -257,8 +248,8 @@ event_unregister_poll(struct event_pool *event_pool, int fd, int idx_hint)
idx = __event_getindex(event_pool, fd, idx_hint);
if (idx == -1) {
- gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND,
- "index not found for fd=%d (idx_hint=%d)", fd, idx_hint);
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ fd, "idx_hint=%d", idx_hint, NULL);
errno = ENOENT;
goto unlock;
}
@@ -298,8 +289,8 @@ event_select_on_poll(struct event_pool *event_pool, int fd, int idx_hint,
idx = __event_getindex(event_pool, fd, idx_hint);
if (idx == -1) {
- gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND,
- "index not found for fd=%d (idx_hint=%d)", fd, idx_hint);
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ fd, "idx_hint=%d", idx_hint, NULL);
errno = ENOENT;
goto unlock;
}
@@ -361,10 +352,8 @@ event_dispatch_poll_handler(struct event_pool *event_pool, struct pollfd *ufds,
idx = __event_getindex(event_pool, ufds[i].fd, i);
if (idx == -1) {
- gf_msg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND,
- "index not found for "
- "fd=%d (idx_hint=%d)",
- ufds[i].fd, i);
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ ufds[i].fd, "idx_hint=%d", i, NULL);
goto unlock;
}
@@ -375,10 +364,10 @@ unlock:
pthread_mutex_unlock(&event_pool->mutex);
if (handler)
- ret = handler(ufds[i].fd, idx, 0, data,
- (ufds[i].revents & (POLLIN | POLLPRI)),
- (ufds[i].revents & (POLLOUT)),
- (ufds[i].revents & (POLLERR | POLLHUP | POLLNVAL)), 0);
+ handler(ufds[i].fd, idx, 0, data,
+ (ufds[i].revents & (POLLIN | POLLPRI)),
+ (ufds[i].revents & (POLLOUT)),
+ (ufds[i].revents & (POLLERR | POLLHUP | POLLNVAL)), 0);
return ret;
}
diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c
index ddba9810b0b..402c253ca25 100644
--- a/libglusterfs/src/event.c
+++ b/libglusterfs/src/event.c
@@ -16,15 +16,14 @@
#include <errno.h>
#include <string.h>
-#include "logging.h"
-#include "gf-event.h"
-#include "mem-pool.h"
-#include "common-utils.h"
-#include "libglusterfs-messages.h"
-#include "syscall.h"
+#include "glusterfs/gf-event.h"
+#include "glusterfs/timespec.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/syscall.h"
struct event_pool *
-event_pool_new(int count, int eventthreadcount)
+gf_event_pool_new(int count, int eventthreadcount)
{
struct event_pool *event_pool = NULL;
extern struct event_ops event_ops_poll;
@@ -53,8 +52,9 @@ event_pool_new(int count, int eventthreadcount)
}
int
-event_register(struct event_pool *event_pool, int fd, event_handler_t handler,
- void *data, int poll_in, int poll_out, char notify_poller_death)
+gf_event_register(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death)
{
int ret = -1;
@@ -67,7 +67,7 @@ out:
}
int
-event_unregister(struct event_pool *event_pool, int fd, int idx)
+gf_event_unregister(struct event_pool *event_pool, int fd, int idx)
{
int ret = -1;
@@ -80,7 +80,7 @@ out:
}
int
-event_unregister_close(struct event_pool *event_pool, int fd, int idx)
+gf_event_unregister_close(struct event_pool *event_pool, int fd, int idx)
{
int ret = -1;
@@ -93,8 +93,8 @@ out:
}
int
-event_select_on(struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
+gf_event_select_on(struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
{
int ret = -1;
@@ -107,7 +107,7 @@ out:
}
int
-event_dispatch(struct event_pool *event_pool)
+gf_event_dispatch(struct event_pool *event_pool)
{
int ret = -1;
@@ -122,7 +122,7 @@ out:
}
int
-event_reconfigure_threads(struct event_pool *event_pool, int value)
+gf_event_reconfigure_threads(struct event_pool *event_pool, int value)
{
int ret = -1;
@@ -136,7 +136,7 @@ out:
}
int
-event_pool_destroy(struct event_pool *event_pool)
+gf_event_pool_destroy(struct event_pool *event_pool)
{
int ret = -1;
int destroy = 0, activethreadcount = 0;
@@ -159,12 +159,12 @@ out:
return ret;
}
-int
+void
poller_destroy_handler(int fd, int idx, int gen, void *data, int poll_out,
int poll_in, int poll_err, char event_thread_exit)
{
struct event_destroy_data *destroy = NULL;
- int readfd = -1, ret = -1;
+ int readfd = -1;
char buf = '\0';
destroy = data;
@@ -176,15 +176,14 @@ poller_destroy_handler(int fd, int idx, int gen, void *data, int poll_out,
while (sys_read(readfd, &buf, 1) > 0) {
}
- ret = 0;
out:
- event_handled(destroy->pool, fd, idx, gen);
+ gf_event_handled(destroy->pool, fd, idx, gen);
- return ret;
+ return;
}
/* This function destroys all the poller threads.
- * Note: to be called before event_pool_destroy is called.
+ * Note: to be called before gf_event_pool_destroy is called.
* The order in which cleaning is performed:
* - Register a pipe fd(this is for waking threads in poll()/epoll_wait())
* - Set the destroy mode, which this no new event registration will succeed
@@ -195,7 +194,7 @@ out:
* threads are destroyed)
*/
int
-event_dispatch_destroy(struct event_pool *event_pool)
+gf_event_dispatch_destroy(struct event_pool *event_pool)
{
int ret = -1, threadcount = 0;
int fd[2] = {-1};
@@ -233,8 +232,8 @@ event_dispatch_destroy(struct event_pool *event_pool)
/* From the main thread register an event on the pipe fd[0],
*/
- idx = event_register(event_pool, fd[0], poller_destroy_handler, &data, 1, 0,
- 0);
+ idx = gf_event_register(event_pool, fd[0], poller_destroy_handler, &data, 1,
+ 0, 0);
if (idx < 0)
goto out;
@@ -248,7 +247,7 @@ event_dispatch_destroy(struct event_pool *event_pool)
}
pthread_mutex_unlock(&event_pool->mutex);
- ret = event_reconfigure_threads(event_pool, 0);
+ ret = gf_event_reconfigure_threads(event_pool, 0);
if (ret < 0)
goto out;
@@ -268,7 +267,7 @@ event_dispatch_destroy(struct event_pool *event_pool)
if (sys_write(fd[1], "dummy", 6) == -1) {
break;
}
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
ret = pthread_cond_timedwait(&event_pool->cond, &event_pool->mutex,
&sleep_till);
@@ -283,7 +282,7 @@ event_dispatch_destroy(struct event_pool *event_pool)
}
pthread_mutex_unlock(&event_pool->mutex);
- ret = event_unregister(event_pool, fd[0], idx);
+ ret = gf_event_unregister(event_pool, fd[0], idx);
out:
if (fd[0] != -1)
@@ -295,7 +294,7 @@ out:
}
int
-event_handled(struct event_pool *event_pool, int fd, int idx, int gen)
+gf_event_handled(struct event_pool *event_pool, int fd, int idx, int gen)
{
int ret = 0;
diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
index b1fa057b81e..33157549897 100644
--- a/libglusterfs/src/events.c
+++ b/libglusterfs/src/events.c
@@ -19,11 +19,11 @@
#include <netinet/in.h>
#include <netdb.h>
-#include "syscall.h"
-#include "mem-pool.h"
-#include "glusterfs.h"
-#include "globals.h"
-#include "events.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/events.h"
#define EVENT_HOST "127.0.0.1"
#define EVENT_PORT 24009
@@ -34,56 +34,66 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
int ret = 0;
int sock = -1;
char *eventstr = NULL;
- struct sockaddr_in server;
va_list arguments;
char *msg = NULL;
glusterfs_ctx_t *ctx = NULL;
char *host = NULL;
struct addrinfo hints;
struct addrinfo *result = NULL;
+ struct addrinfo *iter_result_ptr = NULL;
+ xlator_t *this = THIS;
+ char *volfile_server_transport = NULL;
/* Global context */
- ctx = THIS->ctx;
+ ctx = this->ctx;
if (event < 0 || event >= EVENT_LAST) {
ret = EVENT_ERROR_INVALID_INPUTS;
goto out;
}
- /* Initialize UDP socket */
- sock = socket(AF_INET, SOCK_DGRAM, 0);
- if (sock < 0) {
- ret = EVENT_ERROR_SOCKET;
- goto out;
+ if (ctx) {
+ volfile_server_transport = ctx->cmd_args.volfile_server_transport;
+ }
+ if (!volfile_server_transport) {
+ volfile_server_transport = "tcp";
+ }
+
+ /* host = NULL returns localhost */
+ if (ctx && ctx->cmd_args.volfile_server &&
+ (strcmp(volfile_server_transport, "unix"))) {
+ /* If it is client code then volfile_server is set
+ use that information to push the events. */
+ host = ctx->cmd_args.volfile_server;
}
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_flags = AI_ADDRCONFIG;
- /* Get Host name to send message */
- if (ctx && ctx->cmd_args.volfile_server) {
- /* If it is client code then volfile_server is set
- use that information to push the events. */
- if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints,
- &result)) != 0) {
- ret = EVENT_ERROR_RESOLVE;
- goto out;
- }
+ if ((getaddrinfo(host, TOSTRING(EVENT_PORT), &hints, &result)) != 0) {
+ ret = EVENT_ERROR_RESOLVE;
+ goto out;
+ }
- if (get_ip_from_addrinfo(result, &host) == NULL) {
- ret = EVENT_ERROR_RESOLVE;
- goto out;
+ // iterate over the result and break when socket creation is success.
+ for (iter_result_ptr = result; iter_result_ptr != NULL;
+ iter_result_ptr = iter_result_ptr->ai_next) {
+ sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype,
+ iter_result_ptr->ai_protocol);
+ if (sock != -1) {
+ break;
}
- } else {
- /* Localhost, Use the defined IP for localhost */
- host = gf_strdup(EVENT_HOST);
}
-
- /* Socket Configurations */
- server.sin_family = AF_INET;
- server.sin_port = htons(EVENT_PORT);
- server.sin_addr.s_addr = inet_addr(host);
- memset(&server.sin_zero, '\0', sizeof(server.sin_zero));
+ /*
+ * If none of the addrinfo structures lead to a successful socket
+ * creation, socket creation has failed.
+ */
+ if (sock < 0) {
+ ret = EVENT_ERROR_SOCKET;
+ goto out;
+ }
va_start(arguments, fmt);
ret = gf_vasprintf(&msg, fmt, arguments);
@@ -94,16 +104,16 @@ _gf_event(eventtypes_t event, const char *fmt, ...)
goto out;
}
- ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)time(NULL), event, msg);
-
+ ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)gf_time(), event, msg);
+ GF_FREE(msg);
if (ret <= 0) {
ret = EVENT_ERROR_MSG_FORMAT;
goto out;
}
/* Send Message */
- if (sendto(sock, eventstr, strlen(eventstr), 0, (struct sockaddr *)&server,
- sizeof(server)) <= 0) {
+ if (sendto(sock, eventstr, strlen(eventstr), 0, result->ai_addr,
+ result->ai_addrlen) <= 0) {
ret = EVENT_ERROR_SEND;
goto out;
}
@@ -115,17 +125,10 @@ out:
sys_close(sock);
}
- /* Allocated by gf_vasprintf */
- if (msg)
- GF_FREE(msg);
-
/* Allocated by gf_asprintf */
if (eventstr)
GF_FREE(eventstr);
- if (host)
- GF_FREE(host);
-
if (result)
freeaddrinfo(result);
diff --git a/libglusterfs/src/fd-lk.c b/libglusterfs/src/fd-lk.c
index a6680661be6..c2d34f81c9c 100644
--- a/libglusterfs/src/fd-lk.c
+++ b/libglusterfs/src/fd-lk.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "fd-lk.h"
-#include "common-utils.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/fd-lk.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
int32_t
_fd_lk_delete_lock(fd_lk_ctx_node_t *lock)
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index 25edcbdd387..62606e91164 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -8,11 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "fd.h"
-#include "glusterfs.h"
-#include "dict.h"
-#include "statedump.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/fd.h"
+#include <errno.h> // for EINVAL, errno, ENOMEM
+#include <inttypes.h> // for PRIu64
+#include <stdint.h> // for UINT32_MAX
+#include <string.h> // for NULL, memcpy, memset, size_t
+#include "glusterfs/statedump.h"
static int
gf_fd_fdtable_expand(fdtable_t *fdtable, uint32_t nr);
@@ -501,6 +502,32 @@ out:
}
void
+fd_close(fd_t *fd)
+{
+ xlator_t *xl, *old_THIS;
+
+ old_THIS = THIS;
+
+ for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) {
+ if (!xl->call_cleanup) {
+ THIS = xl;
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ if (xl->cbks->fdclosedir != NULL) {
+ xl->cbks->fdclosedir(xl, fd);
+ }
+ } else {
+ if (xl->cbks->fdclose != NULL) {
+ xl->cbks->fdclose(xl, fd);
+ }
+ }
+ }
+ }
+
+ THIS = old_THIS;
+}
+
+void
fd_unref(fd_t *fd)
{
int32_t refcount = 0;
@@ -532,7 +559,7 @@ fd_unref(fd_t *fd)
return;
}
-fd_t *
+static fd_t *
__fd_bind(fd_t *fd)
{
list_del_init(&fd->inode_list);
@@ -562,9 +589,9 @@ fd_bind(fd_t *fd)
}
static fd_t *
-__fd_create(inode_t *inode, uint64_t pid)
+fd_allocate(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd;
if (inode == NULL) {
gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
@@ -573,64 +600,67 @@ __fd_create(inode_t *inode, uint64_t pid)
}
fd = mem_get0(inode->table->fd_mem_pool);
- if (!fd)
- goto out;
+ if (fd == NULL) {
+ return NULL;
+ }
fd->xl_count = inode->table->xl->graph->xl_count + 1;
fd->_ctx = GF_CALLOC(1, (sizeof(struct _fd_ctx) * fd->xl_count),
gf_common_mt_fd_ctx);
- if (!fd->_ctx)
- goto free_fd;
+ if (fd->_ctx == NULL) {
+ goto failed;
+ }
fd->lk_ctx = fd_lk_ctx_create();
- if (!fd->lk_ctx)
- goto free_fd_ctx;
-
- fd->inode = inode_ref(inode);
- fd->pid = pid;
- INIT_LIST_HEAD(&fd->inode_list);
-
- LOCK_INIT(&fd->lock);
-out:
- return fd;
+ if (fd->lk_ctx != NULL) {
+ /* We need to take a reference from the inode, but we cannot do it
+ * here because this function can be called with the inode lock taken
+ * and inode_ref() takes the inode's table lock. This is the reverse
+ * of the logical lock acquisition order and can cause a deadlock. So
+ * we simply assign the inode here and we delefate the inode reference
+ * responsibility to the caller (when this function succeeds and the
+ * inode lock is released). This is safe because the caller must hold
+ * a reference of the inode to use it, so it's guaranteed that the
+ * number of references won't reach 0 before the caller finishes.
+ *
+ * TODO: minimize use of locks in favor of atomic operations to avoid
+ * these dependencies. */
+ fd->inode = inode;
+ fd->pid = pid;
+ INIT_LIST_HEAD(&fd->inode_list);
+ LOCK_INIT(&fd->lock);
+ GF_ATOMIC_INIT(fd->refcount, 1);
+ return fd;
+ }
-free_fd_ctx:
GF_FREE(fd->_ctx);
-free_fd:
+
+failed:
mem_put(fd);
return NULL;
}
fd_t *
-fd_create(inode_t *inode, pid_t pid)
+fd_create_uint64(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
-
- fd = __fd_create(inode, (uint64_t)pid);
- if (!fd)
- goto out;
+ fd_t *fd;
- fd = fd_ref(fd);
+ fd = fd_allocate(inode, pid);
+ if (fd != NULL) {
+ /* fd_allocate() doesn't get a reference from the inode. We need to
+ * take it here in case of success. */
+ inode_ref(inode);
+ }
-out:
return fd;
}
fd_t *
-fd_create_uint64(inode_t *inode, uint64_t pid)
+fd_create(inode_t *inode, pid_t pid)
{
- fd_t *fd = NULL;
-
- fd = __fd_create(inode, pid);
- if (!fd)
- goto out;
-
- fd = fd_ref(fd);
-
-out:
- return fd;
+ return fd_create_uint64(inode, (uint64_t)pid);
}
static fd_t *
@@ -719,10 +749,13 @@ __fd_lookup_anonymous(inode_t *inode, int32_t flags)
return fd;
}
-static fd_t *
-__fd_anonymous(inode_t *inode, int32_t flags)
+fd_t *
+fd_anonymous_with_flags(inode_t *inode, int32_t flags)
{
fd_t *fd = NULL;
+ bool ref = false;
+
+ LOCK(&inode->lock);
fd = __fd_lookup_anonymous(inode, flags);
@@ -730,54 +763,33 @@ __fd_anonymous(inode_t *inode, int32_t flags)
__fd_lookup_anonymous(), so no need of one more fd_ref().
if (!fd); then both create and bind won't bump up the ref
count, so we have to call fd_ref() after bind. */
- if (!fd) {
- fd = __fd_create(inode, 0);
-
- if (!fd)
- return NULL;
-
- fd->anonymous = _gf_true;
- fd->flags = GF_ANON_FD_FLAGS | flags;
+ if (fd == NULL) {
+ fd = fd_allocate(inode, 0);
+ if (fd != NULL) {
+ fd->anonymous = _gf_true;
+ fd->flags = GF_ANON_FD_FLAGS | (flags & O_DIRECT);
- __fd_bind(fd);
+ __fd_bind(fd);
- __fd_ref(fd);
+ ref = true;
+ }
}
- return fd;
-}
-
-fd_t *
-fd_anonymous(inode_t *inode)
-{
- fd_t *fd = NULL;
+ UNLOCK(&inode->lock);
- LOCK(&inode->lock);
- {
- fd = __fd_anonymous(inode, GF_ANON_FD_FLAGS);
+ if (ref) {
+ /* fd_allocate() doesn't get a reference from the inode. We need to
+ * take it here in case of success. */
+ inode_ref(inode);
}
- UNLOCK(&inode->lock);
return fd;
}
fd_t *
-fd_anonymous_with_flags(inode_t *inode, int32_t flags)
+fd_anonymous(inode_t *inode)
{
- fd_t *fd = NULL;
-
- LOCK(&inode->lock);
- {
- if (flags & O_DIRECT)
- flags = GF_ANON_FD_FLAGS | O_DIRECT;
- else
- flags = GF_ANON_FD_FLAGS;
-
- fd = __fd_anonymous(inode, flags);
- }
- UNLOCK(&inode->lock);
-
- return fd;
+ return fd_anonymous_with_flags(inode, 0);
}
fd_t *
diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py
index c17d450502d..5b7aa4764a0 100755
--- a/libglusterfs/src/generator.py
+++ b/libglusterfs/src/generator.py
@@ -599,6 +599,19 @@ ops['namelink'] = (
('cbk-arg', 'xdata', 'dict_t *'),
)
+ops['copy_file_range'] = (
+ ('fop-arg', 'fd_in', 'fd_t *'),
+ ('fop-arg', 'off_in', 'off64_t '),
+ ('fop-arg', 'fd_out', 'fd_t *'),
+ ('fop-arg', 'off_out', 'off64_t '),
+ ('fop-arg', 'len', 'size_t'),
+ ('fop-arg', 'flags', 'uint32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'stbuf', 'struct iatt *'),
+ ('cbk-arg', 'prebuf_dst', 'struct iatt *'),
+ ('cbk-arg', 'postbuf_dst', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
#####################################################################
xlator_cbks['forget'] = (
('fn-arg', 'this', 'xlator_t *'),
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
index c06e959aeba..a809efc97ef 100644
--- a/libglusterfs/src/gf-dirent.c
+++ b/libglusterfs/src/gf-dirent.c
@@ -11,9 +11,8 @@
#include <stdio.h>
#include <string.h>
#include <stdint.h>
-#include "compat.h"
-#include "xlator.h"
-#include "syncop.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/syncop.h"
#define ONE 1ULL
#define PRESENT_D_OFF_BITS 63
@@ -277,7 +276,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol)
gf_uuid_copy(loc.pargfid, parent->gfid);
loc.name = entry->d_name;
loc.parent = inode_ref(parent);
- ret = inode_path(loc.inode, entry->d_name, &path);
+ ret = inode_path(loc.parent, entry->d_name, &path);
loc.path = path;
if (ret < 0)
goto out;
diff --git a/libglusterfs/src/gfdb/Makefile.am b/libglusterfs/src/gfdb/Makefile.am
deleted file mode 100644
index 3931e694c24..00000000000
--- a/libglusterfs/src/gfdb/Makefile.am
+++ /dev/null
@@ -1,37 +0,0 @@
-libgfdb_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
- $(SQLITE_CFLAGS) -DDATADIR=\"$(localstatedir)\"
-
-libgfdb_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
- -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src \
- -I$(top_builddir)/rpc/xdr/src \
- -DDATADIR=\"$(localstatedir)\"
-
-libgfdb_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(SQLITE_LIBS) $(UUID_LIBS)
-
-libgfdb_la_LDFLAGS = $(GF_LDFLAGS) -version-info $(LIBGLUSTERFS_LT_VERSION)
-
-libgfdbdir = $(includedir)/glusterfs/gfdb
-
-if BUILD_GFDB
- lib_LTLIBRARIES = libgfdb.la
-endif
-
-CONTRIB_BUILDDIR = $(top_builddir)/contrib
-
-libgfdb_la_SOURCES = gfdb_data_store.c gfdb_data_store_helper.c \
- gfdb_sqlite3_helper.c gfdb_sqlite3.c
-
-noinst_HEADERS = gfdb_data_store.h gfdb_data_store_types.h \
- gfdb_sqlite3_helper.h gfdb_sqlite3.h gfdb_mem-types.h \
- gfdb_data_store_helper.h
-
-libgfdb_HEADERS = gfdb_data_store.h gfdb_data_store_types.h \
- gfdb_data_store_helper.h gfdb_sqlite3.h gfdb_mem-types.h \
- gfdb_sqlite3_helper.h
-
-CLEANFILES =
-
-$(top_builddir)/libglusterfs/src/libglusterfs.la:
- $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.c b/libglusterfs/src/gfdb/gfdb_data_store.c
deleted file mode 100644
index ab56a51d462..00000000000
--- a/libglusterfs/src/gfdb/gfdb_data_store.c
+++ /dev/null
@@ -1,802 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "gfdb_sqlite3.h"
-#include "gfdb_data_store.h"
-#include "list.h"
-#include "libglusterfs-messages.h"
-
-/******************************************************************************
- *
- * Database Connection utils/internals
- *
- * ****************************************************************************/
-
-/* GFDB Connection Node:
- * ~~~~~~~~~~~~~~~~~~~~
- * Represents the connection to the database while using libgfdb
- * The connection node is not thread safe as far as fini_db is concerned.
- * You can use a single connection node
- * to do multithreaded db operations like insert/delete/find of records.
- * But you need to wait for all the operating threads to complete i.e
- * pthread_join() and then do fini_db() to kill the connection node.
- * gfdb_conn_node_t is an opaque structure.
- * */
-struct gfdb_conn_node_t {
- gfdb_connection_t gfdb_connection;
- struct list_head conn_list;
-};
-
-/*
- * db_conn_list is the circular linked list which
- * will have all the database connections for the process
- *
- * */
-static gfdb_conn_node_t *db_conn_list;
-
-/*
- * db_conn_mutex is the mutex for db_conn_list
- *
- * */
-static pthread_mutex_t db_conn_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-/*Checks the sanity of the connection node*/
-#define CHECK_CONN_NODE(_conn_node) \
- do { \
- GF_ASSERT(_conn_node); \
- GF_ASSERT(_conn_node->gfdb_connection.gf_db_connection); \
- } while (0)
-
-/* Checks the sanity of the connection node and goto */
-#define CHECK_CONN_NODE_GOTO(_conn_node, label) \
- do { \
- if (!_conn_node) { \
- goto label; \
- }; \
- if (!_conn_node->gfdb_connection.gf_db_connection) { \
- goto label; \
- }; \
- } while (0)
-
-/*Check if the conn node is first in the list*/
-#define IS_FIRST_NODE(db_conn_list, _conn_node) \
- ((_conn_node == db_conn_list) ? _gf_true : _gf_false)
-
-/*Check if the conn node is the only node in the list*/
-#define IS_THE_ONLY_NODE(_conn_node) \
- ((_conn_node->conn_list.next == _conn_node->conn_list.prev) ? _gf_true \
- : _gf_false)
-
-/*Internal Function: Adds connection node to the end of
- * the db connection list.*/
-static int
-add_connection_node(gfdb_conn_node_t *_conn_node)
-{
- int ret = -1;
-
- GF_ASSERT(_conn_node);
-
- /*Lock the list*/
- ret = pthread_mutex_lock(&db_conn_mutex);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ret, LG_MSG_LOCK_LIST_FAILED,
- "Failed lock db connection "
- "list %s",
- strerror(ret));
- ret = -1;
- goto out;
- }
-
- if (db_conn_list == NULL) {
- db_conn_list = _conn_node;
- } else {
- list_add_tail(&_conn_node->conn_list, &db_conn_list->conn_list);
- }
-
- /*unlock the list*/
- ret = pthread_mutex_unlock(&db_conn_mutex);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ret, LG_MSG_UNLOCK_LIST_FAILED,
- "Failed unlock db "
- "connection list %s",
- strerror(ret));
- ret = -1;
- goto out;
- /*TODO What if the unlock fails.
- * Will it lead to deadlock?
- * Most of the gluster code
- * no check for unlock or destroy of mutex!*/
- }
- ret = 0;
-out:
- return ret;
-}
-
-/*Internal Function:
- * Delete connection node from the list*/
-static int
-delete_conn_node(gfdb_conn_node_t *_conn_node)
-{
- int ret = -1;
-
- GF_ASSERT(_conn_node);
-
- /*Lock of the list*/
- ret = pthread_mutex_lock(&db_conn_mutex);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ret, LG_MSG_LOCK_LIST_FAILED,
- "Failed lock on db connection"
- " list %s",
- strerror(ret));
- goto out;
- }
-
- /*Remove the connection object from list*/
- if (IS_THE_ONLY_NODE(_conn_node)) {
- db_conn_list = NULL;
- GF_FREE(_conn_node);
- } else {
- if (IS_FIRST_NODE(db_conn_list, _conn_node)) {
- db_conn_list = list_entry(db_conn_list->conn_list.next,
- gfdb_conn_node_t, conn_list);
- }
- list_del(&_conn_node->conn_list);
- GF_FREE(_conn_node);
- }
-
- /*Release the list lock*/
- ret = pthread_mutex_unlock(&db_conn_mutex);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_WARNING, ret, LG_MSG_UNLOCK_LIST_FAILED,
- "Failed unlock on db "
- "connection list %s",
- strerror(ret));
- /*TODO What if the unlock fails.
- * Will it lead to deadlock?
- * Most of the gluster code
- * no check for unlock or destroy of mutex!*/
- ret = -1;
- goto out;
- }
- ret = 0;
-out:
- return ret;
-}
-
-/*Internal function: Used initialize/map db operation of
- * specified type of db plugin*/
-static int
-init_db_operations(gfdb_db_type_t gfdb_db_type,
- gfdb_db_operations_t *gfdb_db_operations)
-{
- int ret = -1;
-
- GF_ASSERT(gfdb_db_operations);
-
- /*Clear the gfdb_db_operations*/
- gfdb_db_operations = memset(gfdb_db_operations, 0,
- sizeof(*gfdb_db_operations));
- switch (gfdb_db_type) {
- case GFDB_SQLITE3:
- gf_sqlite3_fill_db_operations(gfdb_db_operations);
- ret = 0;
- break;
- case GFDB_HYPERDEX:
- case GFDB_HASH_FILE_STORE:
- case GFDB_ROCKS_DB:
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_UNSUPPORTED_PLUGIN,
- "Plugin not supported");
- break;
- case GFDB_INVALID_DB:
- case GFDB_DB_END:
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_INVALID_DB_TYPE,
- "Invalid DB Type");
- break;
- }
- return ret;
-}
-
-/******************************************************************************
- *
- * LIBGFDB API Functions
- *
- * ****************************************************************************/
-
-/*Libgfdb API Function: Used to initialize a db connection
- * (Constructor function for db connection object)
- * Arguments:
- * args : Dictionary containing database specific parameters
- * eg: For sqlite3, pagesize, cachesize, db name, db path
- etc
- * gfdb_db_type : Type of data base used i.e sqlite or hyperdex etc
- * Returns : if successful return the GFDB Connection node to the caller or
- * NULL in case of failure*/
-gfdb_conn_node_t *
-init_db(dict_t *args, gfdb_db_type_t gfdb_db_type)
-{
- int ret = -1;
- gfdb_conn_node_t *_conn_node = NULL;
- gfdb_db_operations_t *db_operations_t = NULL;
-
- /*Create data base connection object*/
- _conn_node = GF_CALLOC(1, sizeof(gfdb_conn_node_t), gf_mt_db_conn_node_t);
- if (!_conn_node) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "Failed mem alloc for "
- "gfdb_conn_node_t");
- goto alloc_failed;
- }
-
- /*Init the list component of db connection object*/
- INIT_LIST_HEAD(&_conn_node->conn_list);
-
- /*Add created connection node to the list*/
- ret = add_connection_node(_conn_node);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_ADD_TO_LIST_FAILED,
- "Failed to add connection "
- "node to list");
- goto _conn_failed;
- }
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
-
- /*init the db ops object of db connection object*/
- ret = init_db_operations(gfdb_db_type, db_operations_t);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_INIT_DB_FAILED,
- "Failed initializing database "
- "operation failed.");
- goto init_db_failed;
- }
-
- /*Calling the init_db_op of the respected db type*/
- GF_ASSERT(db_operations_t->init_db_op);
- ret = db_operations_t->init_db_op(
- args, &_conn_node->gfdb_connection.gf_db_connection);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_INIT_DB_FAILED,
- "Failed initializing database");
- goto init_db_failed;
- }
- _conn_node->gfdb_connection.gfdb_db_type = gfdb_db_type;
-
- return _conn_node;
-
- /*****Error Handling********/
- /* If init_db_operations or init_db of plugin failed delete
- * conn node from the list.
- * connection node will be free by delete_conn_node*/
-init_db_failed:
- ret = delete_conn_node(_conn_node);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DELETE_FROM_LIST_FAILED,
- "Failed deleting "
- "connection node from list");
- }
- return NULL;
- /*if adding to the list failed free connection node*/
-_conn_failed:
- GF_FREE(_conn_node);
- /*if allocation failed*/
-alloc_failed:
- return NULL;
- /*****Error Handling********/
-}
-
-/*Libgfdb API Function: Used to terminate/de-initialize db connection
- * (Destructor function for db connection object)
- * Arguments:
- * _conn_node : GFDB Connection node
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-fini_db(gfdb_conn_node_t *_conn_node)
-{
- int ret = -1;
- gfdb_db_operations_t *db_operations_t = NULL;
-
- CHECK_CONN_NODE_GOTO(_conn_node, empty);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
-
- GF_ASSERT(db_operations_t->fini_db_op);
-
- ret = db_operations_t->fini_db_op(
- &_conn_node->gfdb_connection.gf_db_connection);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_CLOSE_CONNECTION_FAILED,
- "Failed close the db "
- "connection");
- goto out;
- }
-
- ret = delete_conn_node(_conn_node);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DELETE_FROM_LIST_FAILED,
- "Failed deleting "
- "connection node from list");
- }
-empty:
- ret = 0;
-out:
- return ret;
-}
-
-/*Libgfdb API Function: Used to insert/update records in the database
- * NOTE: In current gfdb_sqlite plugin we use that
- * same function to delete the record. Set the
- * gfdb_fop_path to GFDB_FOP_UNDEL to delete the
- * link of inode from GF_FLINK_TB and
- * GFDB_FOP_UNDEL_ALL to delete all the records from
- * GF_FLINK_TB and GF_FILE_TB.
- * TODO: Should separate this function into the
- * delete_record function
- * Refer CTR Xlator features/changetimerecorder for usage
- * Arguments:
- * _conn_node : GFDB Connection node
- * gfdb_db_record : Record to be inserted/updated
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-insert_record(gfdb_conn_node_t *_conn_node, gfdb_db_record_t *gfdb_db_record)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations_t = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(_conn_node);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations_t->insert_record_op) {
- ret = db_operations_t->insert_record_op(gf_db_connection,
- gfdb_db_record);
- if (ret) {
- gf_msg(GFDB_DATA_STORE,
- _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors),
- 0, LG_MSG_INSERT_OR_UPDATE_FAILED,
- "Insert/Update"
- " operation failed");
- }
- }
-
- return ret;
-}
-
-/*Libgfdb API Function: Used to delete record from the database
- * NOTE: In the current gfdb_sqlite3 plugin
- * implementation this function is dummy.
- * Use the insert_record function.
- * Refer CTR Xlator features/changetimerecorder for usage
- * Arguments:
- * _conn_node : GFDB Connection node
- * gfdb_db_record : Record to be deleted
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-delete_record(gfdb_conn_node_t *_conn_node, gfdb_db_record_t *gfdb_db_record)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations_t = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(_conn_node);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations_t->delete_record_op) {
- ret = db_operations_t->delete_record_op(gf_db_connection,
- gfdb_db_record);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DELETE_FAILED,
- "Delete operation "
- "failed");
- }
- }
-
- return ret;
-}
-
-/*Libgfdb API Function: Compact the database.
- *
- * Arguments:
- * _conn_node : GFDB Connection node
- * _compact_active : Is compaction currently on?
- * _compact_mode_switched : Was the compaction switch flipped?
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-compact_db(gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active,
- gf_boolean_t _compact_mode_switched)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations_t = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(_conn_node);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations_t->compact_db_op) {
- ret = db_operations_t->compact_db_op(gf_db_connection, _compact_active,
- _compact_mode_switched);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_COMPACT_FAILED,
- "Compaction operation "
- "failed");
- }
- }
-
- return ret;
-}
-
-/*Libgfdb API Function: Query all the records from the database
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * query_limit : number to limit number of rows returned by the query
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_all(gfdb_conn_node_t *_conn_node, gf_query_callback_t query_callback,
- void *_query_cbk_args, int query_limit)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations_t = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(_conn_node);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations_t->find_all_op) {
- ret = db_operations_t->find_all_op(gf_db_connection, query_callback,
- _query_cbk_args, query_limit);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED,
- "Find all operation "
- "failed");
- }
- }
-
- return ret;
-}
-
-/*Libgfdb API Function: Query records/files that have not changed/accessed
- * from a time in past to current time
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are not
- * changed/accessed
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_unchanged_for_time(gfdb_conn_node_t *_conn_node,
- gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *for_time)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations_t = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(_conn_node);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations_t->find_unchanged_for_time_op) {
- ret = db_operations_t->find_unchanged_for_time_op(
- gf_db_connection, query_callback, _query_cbk_args, for_time);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED,
- "Find unchanged "
- "operation failed");
- }
- }
-
- return ret;
-}
-
-/*Libgfdb API Function: Query records/files that have changed/accessed from a
- * time in past to current time
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are
- * changed/accessed
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_recently_changed_files(gfdb_conn_node_t *_conn_node,
- gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *from_time)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations_t = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(_conn_node);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations_t->find_recently_changed_files_op) {
- ret = db_operations_t->find_recently_changed_files_op(
- gf_db_connection, query_callback, _query_cbk_args, from_time);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED,
- "Find changed operation failed");
- }
- }
-
- return ret;
-}
-
-/*Libgfdb API Function: Query records/files that have not changed/accessed
- * from a time in past to current time, with
- * a desired frequency
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are not
- * changed/accessed
- * write_freq_thresold : Desired Write Frequency lower limit
- * read_freq_thresold : Desired Read Frequency lower limit
- * _clear_counters : If true, Clears all the frequency counters of
- * all files.
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_unchanged_for_time_freq(gfdb_conn_node_t *_conn_node,
- gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *for_time,
- int write_freq_thresold, int read_freq_thresold,
- gf_boolean_t _clear_counters)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations_t = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(_conn_node);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations_t->find_unchanged_for_time_freq_op) {
- ret = db_operations_t->find_unchanged_for_time_freq_op(
- gf_db_connection, query_callback, _query_cbk_args, for_time,
- write_freq_thresold, read_freq_thresold, _clear_counters);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED,
- "Find unchanged with freq operation failed");
- }
- }
-
- return ret;
-}
-
-/*Libgfdb API Function: Query records/files that have changed/accessed from a
- * time in past to current time, with
- * a desired frequency
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are
- * changed/accessed
- * write_freq_thresold : Desired Write Frequency lower limit
- * read_freq_thresold : Desired Read Frequency lower limit
- * _clear_counters : If true, Clears all the frequency counters of
- * all files.
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_recently_changed_files_freq(gfdb_conn_node_t *_conn_node,
- gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *from_time,
- int write_freq_thresold,
- int read_freq_thresold,
- gf_boolean_t _clear_counters)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations_t = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(_conn_node);
-
- db_operations_t = &_conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = _conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations_t->find_recently_changed_files_freq_op) {
- ret = db_operations_t->find_recently_changed_files_freq_op(
- gf_db_connection, query_callback, _query_cbk_args, from_time,
- write_freq_thresold, read_freq_thresold, _clear_counters);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED,
- "Find changed with freq operation failed");
- }
- }
-
- return ret;
-}
-
-/*Libgfdb API Function: Clear the heat for all the files
- *
- * Arguments:
- * conn_node : GFDB Connection node
- *
- * Returns : if successful return 0 or
- * -ve value in case of failure
- **/
-
-int
-clear_files_heat(gfdb_conn_node_t *conn_node)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(conn_node);
-
- db_operations = &conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations->clear_files_heat_op) {
- ret = db_operations->clear_files_heat_op(gf_db_connection);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
- LG_MSG_INSERT_OR_UPDATE_FAILED,
- "Clear files heat operation failed");
- }
- }
-
- return ret;
-}
-
-/* Libgfdb API Function: Function to extract version of the db
- * Input:
- * gfdb_conn_node_t *conn_node : GFDB Connection node
- * char **version : the version is extracted as a string and will be stored in
- * this variable. The freeing of the memory should be done by
- * the caller.
- * Return:
- * On success return the length of the version string that is
- * extracted.
- * On failure return -1
- * */
-int
-get_db_version(gfdb_conn_node_t *conn_node, char **version)
-{
- int ret = 0;
- gfdb_db_operations_t *db_operations = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(conn_node);
-
- db_operations = &conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations->get_db_version) {
- ret = db_operations->get_db_version(gf_db_connection, version);
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED,
- "Get version failed");
- }
- }
-
- return ret;
-}
-
-int
-get_db_params(gfdb_conn_node_t *conn_node, char *param_key, char **param_value)
-{
- int ret = -1;
- gfdb_db_operations_t *db_operations = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(conn_node);
-
- db_operations = &conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations->get_db_params) {
- ret = db_operations->get_db_params(gf_db_connection, param_key,
- param_value);
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_FIND_OP_FAILED,
- "Get setting failed");
- }
- }
-
- return ret;
-}
-
-int
-set_db_params(gfdb_conn_node_t *conn_node, char *param_key, char *param_value)
-{
- int ret = -1;
- gfdb_db_operations_t *db_operations = NULL;
- void *gf_db_connection = NULL;
-
- CHECK_CONN_NODE(conn_node);
-
- db_operations = &conn_node->gfdb_connection.gfdb_db_operations;
- gf_db_connection = conn_node->gfdb_connection.gf_db_connection;
-
- if (db_operations->set_db_params) {
- ret = db_operations->set_db_params(gf_db_connection, param_key,
- param_value);
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
- LG_MSG_INSERT_OR_UPDATE_FAILED,
- "Failed to set database setting");
- }
- }
-
- return ret;
-}
-
-static const char *
-get_db_path_key()
-{
- return GFDB_SQL_PARAM_DBPATH;
-}
-
-void
-get_gfdb_methods(gfdb_methods_t *methods)
-{
- methods->init_db = init_db;
- methods->fini_db = fini_db;
- methods->find_all = find_all;
- methods->find_unchanged_for_time = find_unchanged_for_time;
- methods->find_recently_changed_files = find_recently_changed_files;
- methods->find_unchanged_for_time_freq = find_unchanged_for_time_freq;
- methods
- ->find_recently_changed_files_freq = find_recently_changed_files_freq;
- methods->clear_files_heat = clear_files_heat;
- methods->get_db_version = get_db_version;
- methods->get_db_params = get_db_params;
- methods->set_db_params = set_db_params;
- methods->get_db_path_key = get_db_path_key;
-
- /* Query Record related functions */
- methods->gfdb_query_record_new = gfdb_query_record_new;
- methods->gfdb_query_record_free = gfdb_query_record_free;
- methods->gfdb_add_link_to_query_record = gfdb_add_link_to_query_record;
- methods->gfdb_write_query_record = gfdb_write_query_record;
- methods->gfdb_read_query_record = gfdb_read_query_record;
-
- /* Link info related functions */
- methods->gfdb_link_info_new = gfdb_link_info_new;
- methods->gfdb_link_info_free = gfdb_link_info_free;
-
- /* Compaction related functions */
- methods->compact_db = compact_db;
-}
diff --git a/libglusterfs/src/gfdb/gfdb_data_store.h b/libglusterfs/src/gfdb/gfdb_data_store.h
deleted file mode 100644
index 59f7bd01ab3..00000000000
--- a/libglusterfs/src/gfdb/gfdb_data_store.h
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef __GFDB_DATA_STORE_H
-#define __GFDB_DATA_STORE_H
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include <time.h>
-#include <sys/time.h>
-
-#include "gfdb_data_store_types.h"
-
-/* GFDB Connection Node:
- * ~~~~~~~~~~~~~~~~~~~~
- * Represents the connection to the database while using libgfdb
- * The connection node is not thread safe as far as fini_db is concerned.
- * You can use a single connection node
- * to do multithreaded db operations like insert/delete/find of records.
- * But you need to wait for all the operating threads to complete i.e
- * pthread_join() and then do fini_db() to kill the connection node.
- * gfdb_conn_node_t is an opaque structure.
- * */
-typedef struct gfdb_conn_node_t gfdb_conn_node_t;
-
-/*Libgfdb API Function: Used to initialize db connection
- * Arguments:
- * args : Dictionary containing database specific parameters
- * eg: For sqlite3, pagesize, cachesize, db name, db path
- etc
- * gfdb_db_type : Type of data base used i.e sqlite or hyperdex etc
- * Returns : if successful return the GFDB Connection Node to the caller or
- * NULL value in case of failure*/
-gfdb_conn_node_t *
-init_db(dict_t *arg, gfdb_db_type_t db_type);
-
-typedef gfdb_conn_node_t *(*init_db_t)(dict_t *args,
- gfdb_db_type_t gfdb_db_type);
-
-/*Libgfdb API Function: Used to terminate/de-initialize db connection
- * (Destructor function for db connection object)
- * Arguments:
- * _conn_node : DB Connection Index of the DB Connection
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-fini_db(gfdb_conn_node_t *);
-
-typedef int (*fini_db_t)(gfdb_conn_node_t *_conn_node);
-
-/*Libgfdb API Function: Used to insert/updated records in the database
- * NOTE: In current gfdb_sqlite plugin we use that
- * same function to delete the record. Set the
- * gfdb_fop_path to GFDB_FOP_UNDEL to delete the
- * link of inode from GF_FLINK_TB and
- * GFDB_FOP_UNDEL_ALL to delete all the records from
- * GF_FLINK_TB and GF_FILE_TB.
- * TODO: Should separate this function into the
- * delete_record function
- * Refer CTR Xlator features/changetimerecorder for usage
- * Arguments:
- * _conn_node : GFDB Connection node
- * gfdb_db_record : Record to be inserted/updated
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-insert_record(gfdb_conn_node_t *, gfdb_db_record_t *gfdb_db_record);
-
-/*Libgfdb API Function: Used to delete record from the database
- * NOTE: In the current gfdb_sqlite3 plugin
- * implementation this function is dummy.
- * Use the insert_record function.
- * Refer CTR Xlator features/changetimerecorder for usage
- * Arguments:
- * _conn_node : GFDB Connection node
- * gfdb_db_record : Record to be deleted
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-delete_record(gfdb_conn_node_t *, gfdb_db_record_t *gfdb_db_record);
-
-/*Libgfdb API Function: Query all the records from the database
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * query_limit : 0 - umlimited,
- * any positive value - adds the LIMIT clause
- * to the SQL query
- *
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_all(gfdb_conn_node_t *, gf_query_callback_t query_callback,
- void *_query_cbk_args, int query_limit);
-
-typedef int (*find_all_t)(gfdb_conn_node_t *,
- gf_query_callback_t query_callback,
- void *_query_cbk_args, int query_limit);
-
-/*Libgfdb API Function: Query records/files that have not changed/accessed
- * from a time in past to current time
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are not
- * changed/accessed
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_unchanged_for_time(gfdb_conn_node_t *, gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *for_time);
-
-typedef int (*find_unchanged_for_time_t)(gfdb_conn_node_t *_conn_node,
- gf_query_callback_t query_callback,
- void *_query_cbk_args,
- gfdb_time_t *for_time);
-
-/*Libgfdb API Function: Query records/files that have changed/accessed from a
- * time in past to current time
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are
- * changed/accessed
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_recently_changed_files(gfdb_conn_node_t *_conn,
- gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *from_time);
-
-typedef int (*find_recently_changed_files_t)(gfdb_conn_node_t *_conn_node,
- gf_query_callback_t query_callback,
- void *_query_cbk_args,
- gfdb_time_t *from_time);
-
-/*Libgfdb API Function: Query records/files that have not changed/accessed
- * from a time in past to current time, with
- * a desired frequency
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are not
- * changed/accessed
- * write_freq_thresold : Desired Write Frequency lower limit
- * read_freq_thresold : Desired Read Frequency lower limit
- * _clear_counters : If true, Clears all the frequency counters of
- * all files.
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_unchanged_for_time_freq(gfdb_conn_node_t *_conn,
- gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *for_time,
- int write_freq_thresold, int read_freq_thresold,
- gf_boolean_t _clear_counters);
-
-typedef int (*find_unchanged_for_time_freq_t)(
- gfdb_conn_node_t *_conn_node, gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *for_time, int write_freq_thresold,
- int read_freq_thresold, gf_boolean_t _clear_counters);
-
-/*Libgfdb API Function: Query records/files that have changed/accessed from a
- * time in past to current time, with
- * a desired frequency
- * Arguments:
- * _conn_node : GFDB Connection node
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are
- * changed/accessed
- * write_freq_thresold : Desired Write Frequency lower limit
- * read_freq_thresold : Desired Read Frequency lower limit
- * _clear_counters : If true, Clears all the frequency counters of
- * all files.
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-find_recently_changed_files_freq(gfdb_conn_node_t *_conn,
- gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *from_time,
- int write_freq_thresold,
- int read_freq_thresold,
- gf_boolean_t _clear_counters);
-
-typedef int (*find_recently_changed_files_freq_t)(
- gfdb_conn_node_t *_conn_node, gf_query_callback_t query_callback,
- void *_query_cbk_args, gfdb_time_t *from_time, int write_freq_thresold,
- int read_freq_thresold, gf_boolean_t _clear_counters);
-
-typedef const char *(*get_db_path_key_t)();
-
-/*Libgfdb API Function: Clear the heat for all the files
- *
- * Arguments:
- * _conn_node : GFDB Connection node
- *
- * Returns : if successful return 0 or
- * -ve value in case of failure
- **/
-int
-clear_files_heat(gfdb_conn_node_t *_conn_node);
-
-typedef int (*clear_files_heat_t)(gfdb_conn_node_t *_conn_node);
-
-/* Libgfdb API Function: Function to extract version of the db
- * Arguments:
- * gfdb_conn_node_t *_conn_node : GFDB Connection node
- * char **version : the version is extracted as a string
- * and will be stored in this variable.
- * The freeing of the memory should be done by the caller.
- * Return:
- * On success return the length of the version string that is
- * extracted.
- * On failure return -1
- * */
-int
-get_db_version(gfdb_conn_node_t *_conn_node, char **version);
-
-typedef int (*get_db_version_t)(gfdb_conn_node_t *_conn_node, char **version);
-
-/* Libgfdb API Function: Function to extract param from the db
- * Arguments:
- * gfdb_conn_node_t *_conn_node : GFDB Connection node
- * char *param_key : param to be extracted
- * char **param_value : the value of the param that is
- * extracted. This function will allocate memory
- * to pragma_value. The caller should free the memory.
- * Return:
- * On success return the length of the param value that is
- * extracted.
- * On failure return -1
- * */
-int
-get_db_params(gfdb_conn_node_t *_conn_node, char *param_key,
- char **param_value);
-
-typedef int (*get_db_params_t)(gfdb_conn_node_t *db_conn, char *param_key,
- char **param_value);
-
-/* Libgfdb API Function: Function to set db params
- * Arguments:
- * gfdb_conn_node_t *_conn_node : GFDB Connection node
- * char *param_key : param to be set
- * char *param_value : param value
- * Return:
- * On success return 0
- * On failure return -1
- * */
-int
-set_db_params(gfdb_conn_node_t *_conn_node, char *param_key, char *param_value);
-
-typedef int (*set_db_params_t)(gfdb_conn_node_t *db_conn, char *param_key,
- char *param_value);
-
-/*Libgfdb API Function: Compact the database.
- *
- * Arguments:
- * _conn_node : GFDB Connection node
- * _compact_active : Is compaction currently on?
- * _compact_mode_switched : Was the compaction switch flipped?
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-int
-compact_db(gfdb_conn_node_t *_conn_node, gf_boolean_t _compact_active,
- gf_boolean_t _compact_mode_switched);
-
-typedef int (*compact_db_t)(gfdb_conn_node_t *db_conn,
- gf_boolean_t compact_active,
- gf_boolean_t compact_mode_switched);
-
-typedef struct gfdb_methods_s {
- init_db_t init_db;
- fini_db_t fini_db;
- find_all_t find_all;
- find_unchanged_for_time_t find_unchanged_for_time;
- find_recently_changed_files_t find_recently_changed_files;
- find_unchanged_for_time_freq_t find_unchanged_for_time_freq;
- find_recently_changed_files_freq_t find_recently_changed_files_freq;
- clear_files_heat_t clear_files_heat;
- get_db_version_t get_db_version;
- get_db_params_t get_db_params;
- set_db_params_t set_db_params;
- /* Do not expose dbpath directly. Expose it via an */
- /* access function: get_db_path_key(). */
- char *dbpath;
- get_db_path_key_t get_db_path_key;
-
- /* Query Record related functions */
- gfdb_query_record_new_t gfdb_query_record_new;
- gfdb_query_record_free_t gfdb_query_record_free;
- gfdb_add_link_to_query_record_t gfdb_add_link_to_query_record;
- gfdb_write_query_record_t gfdb_write_query_record;
- gfdb_read_query_record_t gfdb_read_query_record;
-
- /* Link info related functions */
- gfdb_link_info_new_t gfdb_link_info_new;
- gfdb_link_info_free_t gfdb_link_info_free;
-
- /* Compaction related functions */
- compact_db_t compact_db;
-} gfdb_methods_t;
-
-void
-get_gfdb_methods(gfdb_methods_t *methods);
-
-typedef void (*get_gfdb_methods_t)(gfdb_methods_t *methods);
-
-#endif
diff --git a/libglusterfs/src/gfdb/gfdb_data_store_helper.c b/libglusterfs/src/gfdb/gfdb_data_store_helper.c
deleted file mode 100644
index b1f79bc8dad..00000000000
--- a/libglusterfs/src/gfdb/gfdb_data_store_helper.c
+++ /dev/null
@@ -1,588 +0,0 @@
-
-#include "gfdb_data_store_helper.h"
-#include "syscall.h"
-
-/******************************************************************************
- *
- * Query record related functions
- *
- * ****************************************************************************/
-
-/*Create a single link info structure*/
-gfdb_link_info_t *
-gfdb_link_info_new()
-{
- gfdb_link_info_t *link_info = NULL;
-
- link_info = GF_CALLOC(1, sizeof(gfdb_link_info_t), gf_mt_gfdb_link_info_t);
- if (!link_info) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "Memory allocation failed for "
- "link_info ");
- goto out;
- }
-
- INIT_LIST_HEAD(&link_info->list);
-
-out:
-
- return link_info;
-}
-
-/*Destroy a link info structure*/
-void
-gfdb_link_info_free(gfdb_link_info_t *link_info)
-{
- GF_FREE(link_info);
-}
-
-/*Function to create the query_record*/
-gfdb_query_record_t *
-gfdb_query_record_new()
-{
- int ret = -1;
- gfdb_query_record_t *query_record = NULL;
-
- query_record = GF_CALLOC(1, sizeof(gfdb_query_record_t),
- gf_mt_gfdb_query_record_t);
- if (!query_record) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "Memory allocation failed for "
- "query_record ");
- goto out;
- }
-
- INIT_LIST_HEAD(&query_record->link_list);
-
- ret = 0;
-out:
- if (ret == -1) {
- GF_FREE(query_record);
- }
- return query_record;
-}
-
-/*Function to delete a single linkinfo from list*/
-static void
-gfdb_delete_linkinfo_from_list(gfdb_link_info_t **link_info)
-{
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, link_info, out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, *link_info, out);
-
- /*Remove hard link from list*/
- list_del(&(*link_info)->list);
- gfdb_link_info_free(*link_info);
- link_info = NULL;
-out:
- return;
-}
-
-/*Function to destroy link_info list*/
-void
-gfdb_free_link_info_list(gfdb_query_record_t *query_record)
-{
- gfdb_link_info_t *link_info = NULL;
- gfdb_link_info_t *temp = NULL;
-
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out);
-
- list_for_each_entry_safe(link_info, temp, &query_record->link_list, list)
- {
- gfdb_delete_linkinfo_from_list(&link_info);
- link_info = NULL;
- }
-
-out:
- return;
-}
-
-/* Function to add linkinfo to the query record */
-int
-gfdb_add_link_to_query_record(gfdb_query_record_t *query_record, uuid_t pgfid,
- char *base_name)
-{
- int ret = -1;
- gfdb_link_info_t *link_info = NULL;
- int base_name_len = 0;
-
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, pgfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, base_name, out);
-
- link_info = gfdb_link_info_new();
- if (!link_info) {
- goto out;
- }
-
- gf_uuid_copy(link_info->pargfid, pgfid);
- base_name_len = strlen(base_name);
- memcpy(link_info->file_name, base_name, base_name_len);
- link_info->file_name[base_name_len] = '\0';
-
- list_add_tail(&link_info->list, &query_record->link_list);
-
- query_record->link_count++;
-
- ret = 0;
-out:
- if (ret) {
- gfdb_link_info_free(link_info);
- link_info = NULL;
- }
- return ret;
-}
-
-/*Function to destroy query record*/
-void
-gfdb_query_record_free(gfdb_query_record_t *query_record)
-{
- if (query_record) {
- gfdb_free_link_info_list(query_record);
- GF_FREE(query_record);
- }
-}
-
-/******************************************************************************
- SERIALIZATION/DE-SERIALIZATION OF QUERY RECORD
-*******************************************************************************/
-/******************************************************************************
- The on disk format of query record is as follows,
-
-+---------------------------------------------------------------------------+
-| Length of serialized query record | Serialized Query Record |
-+---------------------------------------------------------------------------+
- 4 bytes Length of serialized query record
- |
- |
- -------------------------------------------------|
- |
- |
- V
- Serialized Query Record Format:
- +---------------------------------------------------------------------------+
- | GFID | Link count | <LINK INFO> |..... | FOOTER |
- +---------------------------------------------------------------------------+
- 16 B 4 B Link Length 4 B
- | |
- | |
- -----------------------------| |
- | |
- | |
- V |
- Each <Link Info> will be serialized as |
- +-----------------------------------------------+ |
- | PGID | BASE_NAME_LENGTH | BASE_NAME | |
- +-----------------------------------------------+ |
- 16 B 4 B BASE_NAME_LENGTH |
- |
- |
- ------------------------------------------------------------------------|
- |
- |
- V
- FOOTER is a magic number 0xBAADF00D indicating the end of the record.
- This also serves as a serialized schema validator.
- * ****************************************************************************/
-
-#define GFDB_QUERY_RECORD_FOOTER 0xBAADF00D
-#define UUID_LEN 16
-
-/*Function to get the potential length of the serialized buffer*/
-static int32_t
-gfdb_query_record_serialized_length(gfdb_query_record_t *query_record)
-{
- int32_t len = -1;
- gfdb_link_info_t *link_info = NULL;
-
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out);
-
- /* Length of GFID */
- len = UUID_LEN;
-
- /* length of number of links*/
- len += sizeof(int32_t);
-
- list_for_each_entry(link_info, &query_record->link_list, list)
- {
- /* length of PFID */
- len += UUID_LEN;
-
- /* Add size of base name length*/
- len += sizeof(int32_t);
-
- /* Length of base_name */
- len += strlen(link_info->file_name);
- }
-
- /* length of footer */
- len += sizeof(int32_t);
-out:
- return len;
-}
-
-/* Function for serializing query record.
- *
- * Query Record Serialization Format
- * +---------------------------------------------------------------------------+
- * | GFID | Link count | <LINK INFO> |..... | FOOTER |
- * +---------------------------------------------------------------------------+
- * 16 B 4 B Link Length 4 B
- *
- *
- * Each <Link Info> will be serialized as
- * +-----------------------------------------------+
- * | PGID | BASE_NAME_LENGTH | BASE_NAME |
- * +-----------------------------------------------+
- * 16 B 4 B BASE_NAME_LENGTH
- *
- *
- * FOOTER is a magic number 0xBAADF00D indicating the end of the record.
- * This also serves as a serialized schema validator.
- *
- * The function will allocate memory to the serialized buffer,
- * the caller needs to free it.
- * Returns the length of the serialized buffer on success
- * or -1 on failure.
- *
- * */
-static int
-gfdb_query_record_serialize(gfdb_query_record_t *query_record, char **in_buffer)
-{
- gfdb_link_info_t *link_info = NULL;
- int count = -1;
- int base_name_len = 0;
- int buffer_length = 0;
- int footer = GFDB_QUERY_RECORD_FOOTER;
- char *buffer = NULL;
- char *ret_buffer = NULL;
-
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, (query_record->link_count > 0), out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, in_buffer, out);
-
- /* Calculate the total length of the serialized buffer */
- buffer_length = gfdb_query_record_serialized_length(query_record);
- if (buffer_length <= 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Failed to calculate the length of "
- "serialized buffer");
- goto out;
- }
-
- /* Allocate memory to the serialized buffer */
- ret_buffer = GF_CALLOC(1, buffer_length, gf_common_mt_char);
- if (!ret_buffer) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Memory allocation failed for "
- "serialized buffer.");
- goto out;
- }
-
- buffer = ret_buffer;
-
- count = 0;
-
- /* Copying the GFID */
- memcpy(buffer, query_record->gfid, UUID_LEN);
- buffer += UUID_LEN;
- count += UUID_LEN;
-
- /* Copying the number of links */
- memcpy(buffer, &query_record->link_count, sizeof(int32_t));
- buffer += sizeof(int32_t);
- count += sizeof(int32_t);
-
- list_for_each_entry(link_info, &query_record->link_list, list)
- {
- /* Copying the PFID */
- memcpy(buffer, link_info->pargfid, UUID_LEN);
- buffer += UUID_LEN;
- count += UUID_LEN;
-
- /* Copying base name length*/
- base_name_len = strlen(link_info->file_name);
- memcpy(buffer, &base_name_len, sizeof(int32_t));
- buffer += sizeof(int32_t);
- count += sizeof(int32_t);
-
- /* Length of base_name */
- memcpy(buffer, link_info->file_name, base_name_len);
- buffer += base_name_len;
- count += base_name_len;
- }
-
- /* Copying the Footer of the record */
- memcpy(buffer, &footer, sizeof(int32_t));
- count += sizeof(int32_t);
-
-out:
- if (count < 0) {
- GF_FREE(ret_buffer);
- ret_buffer = NULL;
- }
- *in_buffer = ret_buffer;
- return count;
-}
-
-static gf_boolean_t
-is_serialized_buffer_valid(char *in_buffer, int buffer_length)
-{
- gf_boolean_t ret = _gf_false;
- int footer = 0;
-
- /* Read the footer */
- in_buffer += (buffer_length - sizeof(int32_t));
- memcpy(&footer, in_buffer, sizeof(int32_t));
-
- /*
- * if the footer is not GFDB_QUERY_RECORD_FOOTER
- * then the serialized record is invalid
- *
- * */
- if (footer != GFDB_QUERY_RECORD_FOOTER) {
- goto out;
- }
-
- ret = _gf_true;
-out:
- return ret;
-}
-
-static int
-gfdb_query_record_deserialize(char *in_buffer, int buffer_length,
- gfdb_query_record_t **query_record)
-{
- int ret = -1;
- char *buffer = NULL;
- int i = 0;
- gfdb_link_info_t *link_info = NULL;
- int count = 0;
- int base_name_len = 0;
- gfdb_query_record_t *ret_qrecord = NULL;
-
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, in_buffer, out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, (buffer_length > 0), out);
-
- if (!is_serialized_buffer_valid(in_buffer, buffer_length)) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Invalid serialized query record");
- goto out;
- }
-
- buffer = in_buffer;
-
- ret_qrecord = gfdb_query_record_new();
- if (!ret_qrecord) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Failed to allocate space to "
- "gfdb_query_record_t");
- goto out;
- }
-
- /* READ GFID */
- memcpy((ret_qrecord)->gfid, buffer, UUID_LEN);
- buffer += UUID_LEN;
- count += UUID_LEN;
-
- /* Read the number of link */
- memcpy(&(ret_qrecord->link_count), buffer, sizeof(int32_t));
- buffer += sizeof(int32_t);
- count += sizeof(int32_t);
-
- /* Read all the links */
- for (i = 0; i < ret_qrecord->link_count; i++) {
- if (count >= buffer_length) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Invalid serialized "
- "query record");
- ret = -1;
- goto out;
- }
-
- link_info = gfdb_link_info_new();
- if (!link_info) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Failed to create link_info");
- goto out;
- }
-
- /* READ PGFID */
- memcpy(link_info->pargfid, buffer, UUID_LEN);
- buffer += UUID_LEN;
- count += UUID_LEN;
-
- /* Read base name length */
- memcpy(&base_name_len, buffer, sizeof(int32_t));
- buffer += sizeof(int32_t);
- count += sizeof(int32_t);
-
- /* READ basename */
- memcpy(link_info->file_name, buffer, base_name_len);
- buffer += base_name_len;
- count += base_name_len;
- link_info->file_name[base_name_len] = '\0';
-
- /* Add link_info to the list */
- list_add_tail(&link_info->list, &(ret_qrecord->link_list));
-
- /* Resetting link_info */
- link_info = NULL;
- }
-
- ret = 0;
-out:
- if (ret) {
- gfdb_query_record_free(ret_qrecord);
- ret_qrecord = NULL;
- }
- *query_record = ret_qrecord;
- return ret;
-}
-
-/* Function to write query record to file
- *
- * Disk format
- * +---------------------------------------------------------------------------+
- * | Length of serialized query record | Serialized Query Record |
- * +---------------------------------------------------------------------------+
- * 4 bytes Length of serialized query record
- *
- * Please refer gfdb_query_record_serialize () for format of
- * Serialized Query Record
- *
- * */
-int
-gfdb_write_query_record(int fd, gfdb_query_record_t *query_record)
-{
- int ret = -1;
- int buffer_len = 0;
- char *buffer = NULL;
- int write_len = 0;
- char *write_buffer = NULL;
-
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, (fd >= 0), out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out);
-
- buffer_len = gfdb_query_record_serialize(query_record, &buffer);
- if (buffer_len < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Failed to serialize query record");
- goto out;
- }
-
- /* Serialize the buffer length and write to file */
- ret = write(fd, &buffer_len, sizeof(int32_t));
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Failed to write buffer length"
- " to file");
- goto out;
- }
-
- /* Write the serialized query record to file */
- write_len = buffer_len;
- write_buffer = buffer;
- while ((ret = write(fd, write_buffer, write_len)) < write_len) {
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, errno, LG_MSG_DB_ERROR,
- "Failed to write serialized "
- "query record to file");
- goto out;
- }
-
- write_buffer += ret;
- write_len -= ret;
- }
-
- ret = 0;
-out:
- GF_FREE(buffer);
- return ret;
-}
-
-/* Function to read query record from file.
- * Allocates memory to query record and
- * returns length of serialized query record when successful
- * Return -1 when failed.
- * Return 0 when reached EOF.
- * */
-int
-gfdb_read_query_record(int fd, gfdb_query_record_t **query_record)
-{
- int ret = -1;
- int buffer_len = 0;
- int read_len = 0;
- char *buffer = NULL;
- char *read_buffer = NULL;
-
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, (fd >= 0), out);
- GF_VALIDATE_OR_GOTO(GFDB_DATA_STORE, query_record, out);
-
- /* Read serialized query record length from the file*/
- ret = sys_read(fd, &buffer_len, sizeof(int32_t));
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Failed reading buffer length"
- " from file");
- goto out;
- }
- /* EOF */
- else if (ret == 0) {
- ret = 0;
- goto out;
- }
-
- /* Assumed sane range is 1B - 10MB */
- if ((buffer_len <= 0) || (buffer_len > (10 * 1024 * 1024))) {
- ret = -1;
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "buffer length range is out of bound %d", buffer_len);
- goto out;
- }
-
- /* Allocating memory to the serialization buffer */
- buffer = GF_CALLOC(1, buffer_len, gf_common_mt_char);
- if (!buffer) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Failed to allocate space to "
- "serialized buffer");
- goto out;
- }
-
- /* Read the serialized query record from file */
- read_len = buffer_len;
- read_buffer = buffer;
- while ((ret = sys_read(fd, read_buffer, read_len)) < read_len) {
- /*Any error */
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, errno, LG_MSG_DB_ERROR,
- "Failed to read serialized "
- "query record from file");
- goto out;
- }
- /* EOF */
- else if (ret == 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Invalid query record or "
- "corrupted query file");
- ret = -1;
- goto out;
- }
-
- read_buffer += ret;
- read_len -= ret;
- }
-
- ret = gfdb_query_record_deserialize(buffer, buffer_len, query_record);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "Failed to de-serialize query record");
- goto out;
- }
-
- ret = buffer_len;
-out:
- GF_FREE(buffer);
- return ret;
-}
diff --git a/libglusterfs/src/gfdb/gfdb_data_store_helper.h b/libglusterfs/src/gfdb/gfdb_data_store_helper.h
deleted file mode 100644
index 7b4b0ae6aa1..00000000000
--- a/libglusterfs/src/gfdb/gfdb_data_store_helper.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef __GFDB_DATA_STORE_HELPER_H
-#define __GFDB_DATA_STORE_HELPER_H
-
-#include <time.h>
-#include <sys/time.h>
-#include <string.h>
-#include <fcntl.h>
-
-#include "common-utils.h"
-#include "compat-uuid.h"
-#include "gfdb_mem-types.h"
-#include "dict.h"
-#include "byte-order.h"
-#include "libglusterfs-messages.h"
-
-#define GFDB_DATA_STORE "gfdbdatastore"
-
-/*******************************************************************************
- *
- * Query related data structure and functions
- *
- * ****************************************************************************/
-
-#ifdef NAME_MAX
-#define GF_NAME_MAX NAME_MAX
-#else
-#define GF_NAME_MAX 255
-#endif
-
-/*Structure to hold the link information*/
-typedef struct gfdb_link_info {
- uuid_t pargfid;
- char file_name[GF_NAME_MAX];
- struct list_head list;
-} gfdb_link_info_t;
-
-/*Structure used for querying purpose*/
-typedef struct gfdb_query_record {
- uuid_t gfid;
- /*This is the hardlink list*/
- struct list_head link_list;
- int link_count;
-} gfdb_query_record_t;
-
-/*Create a single link info structure*/
-gfdb_link_info_t *
-gfdb_link_info_new();
-typedef gfdb_link_info_t *(*gfdb_link_info_new_t)();
-
-/*Destroy a link info structure*/
-void
-gfdb_link_info_free(gfdb_link_info_t *gfdb_link_info);
-typedef void (*gfdb_link_info_free_t)(gfdb_link_info_t *gfdb_link_info);
-
-/* Function to create the query_record */
-gfdb_query_record_t *
-gfdb_query_record_new();
-typedef gfdb_query_record_t *(*gfdb_query_record_new_t)();
-
-/* Function to add linkinfo to query record */
-int
-gfdb_add_link_to_query_record(gfdb_query_record_t *gfdb_query_record,
- uuid_t pgfid, char *base_name);
-typedef int (*gfdb_add_link_to_query_record_t)(gfdb_query_record_t *, uuid_t,
- char *);
-
-/*Function to destroy query record*/
-void
-gfdb_query_record_free(gfdb_query_record_t *gfdb_query_record);
-typedef void (*gfdb_query_record_free_t)(gfdb_query_record_t *);
-
-/* Function to write query record to file */
-int
-gfdb_write_query_record(int fd, gfdb_query_record_t *gfdb_query_record);
-typedef int (*gfdb_write_query_record_t)(int, gfdb_query_record_t *);
-
-/* Function to read query record from file.
- * Allocates memory to query record and return 0 when successful
- * Return -1 when failed.
- * Return 0 when EOF.
- * */
-int
-gfdb_read_query_record(int fd, gfdb_query_record_t **gfdb_query_record);
-typedef int (*gfdb_read_query_record_t)(int, gfdb_query_record_t **);
-
-#endif \ No newline at end of file
diff --git a/libglusterfs/src/gfdb/gfdb_data_store_types.h b/libglusterfs/src/gfdb/gfdb_data_store_types.h
deleted file mode 100644
index 5ee050d4fab..00000000000
--- a/libglusterfs/src/gfdb/gfdb_data_store_types.h
+++ /dev/null
@@ -1,532 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef __GFDB_DATA_STORE_TYPE_H
-#define __GFDB_DATA_STORE_TYPE_H
-
-#include "gfdb_data_store_helper.h"
-
-/*
- * Helps in dynamically choosing log level
- * */
-static inline gf_loglevel_t
-_gfdb_log_level(gf_loglevel_t given_level, gf_boolean_t ignore_level)
-{
- return (ignore_level) ? GF_LOG_DEBUG : given_level;
-}
-
-typedef enum gf_db_operation {
- GFDB_INVALID_DB_OP = -1,
- /* Query DB OPS : All the Query DB_OP should be added */
- /* in between START and END */
- GFDB_QUERY_DB_OP_START, /* Start of Query DB_OP */
- GFDB_QUERY_DB_OP,
- GF_FTABLE_EXISTS_DB_OP,
- GFDB_QUERY_DB_OP_END, /* End of Query DB_OP */
- /* Non-Query DB OPS */
- GFDB_DB_CREATE_DB_OP,
- GFDB_GFID_EXIST_DB_OP,
- GFDB_W_INSERT_DB_OP,
- GFDB_WU_INSERT_DB_OP,
- GFDB_W_UPDATE_DB_OP,
- GFDB_WU_UPDATE_DB_OP,
- GFDB_W_DELETE_DB_OP,
- GFDB_UW_DELETE_DB_OP,
- GFDB_WFC_UPDATE_DB_OP,
- GFDB_RFC_UPDATE_DB_OP,
- GFDB_DB_COMPACT_DB_OP /* Added for VACUUM/manual compaction support */
-} gf_db_operation_t;
-
-#define GF_COL_MAX_NUM 2
-#define GF_COL_ALL " * "
-
-/* Column/fields names used in the DB.
- * If any new field is added should be updated here*/
-#define GF_COL_GF_ID "GF_ID"
-#define GF_COL_GF_PID "GF_PID"
-#define GF_COL_FILE_NAME "FNAME"
-#define GF_COL_WSEC "W_SEC"
-#define GF_COL_WMSEC "W_MSEC"
-#define GF_COL_UWSEC "UW_SEC"
-#define GF_COL_UWMSEC "UW_MSEC"
-#define GF_COL_WSEC_READ "W_READ_SEC"
-#define GF_COL_WMSEC_READ "W_READ_MSEC"
-#define GF_COL_UWSEC_READ "UW_READ_SEC"
-#define GF_COL_UWMSEC_READ "UW_READ_MSEC"
-#define GF_COL_WDEL_FLAG "W_DEL_FLAG"
-#define GF_COL_WRITE_FREQ_CNTR "WRITE_FREQ_CNTR"
-#define GF_COL_READ_FREQ_CNTR "READ_FREQ_CNTR"
-#define GF_COL_LINK_UPDATE "LINK_UPDATE"
-
-/***********************Time related********************************/
-/*1 sec = 1000000 microsec*/
-#define GFDB_MICROSEC 1000000
-
-/*All the gfdb times are represented using this structure*/
-typedef struct timeval gfdb_time_t;
-
-/*Convert time into seconds*/
-static inline uint64_t
-gfdb_time_2_usec(gfdb_time_t *gfdb_time)
-{
- GF_ASSERT(gfdb_time);
- return ((uint64_t)gfdb_time->tv_sec * GFDB_MICROSEC) + gfdb_time->tv_usec;
-}
-
-/******************************************************************************
- *
- * Insert/Update Record related data structures/functions
- *
- * ****************************************************************************/
-
-/*Indicated a generic synchronous write to the db
- * This may or may not be implemented*/
-typedef enum gfdb_sync_type {
- GFDB_INVALID_SYNC = -1,
- GFDB_DB_ASYNC,
- GFDB_DB_SYNC
-} gfdb_sync_type_t;
-
-/*Strings related to the abvove sync type*/
-#define GFDB_STR_DB_ASYNC "async"
-#define GFDB_STR_DB_SYNC "sync"
-
-/*To convert sync type from string to gfdb_sync_type_t*/
-static inline int
-gf_string2gfdbdbsync(char *sync_option)
-{
- int ret = -1;
-
- if (!sync_option)
- goto out;
- if (strcmp(sync_option, GFDB_STR_DB_ASYNC) == 0) {
- ret = GFDB_DB_ASYNC;
- } else if (strcmp(sync_option, GFDB_STR_DB_SYNC) == 0) {
- ret = GFDB_DB_SYNC;
- }
-out:
- return ret;
-}
-
-/*Indicated different types of db*/
-typedef enum gfdb_db_type {
- GFDB_INVALID_DB = -1,
- GFDB_HASH_FILE_STORE,
- GFDB_ROCKS_DB,
- GFDB_SQLITE3,
- GFDB_HYPERDEX,
- GFDB_DB_END /*Add DB type Entries above this only*/
-} gfdb_db_type_t;
-
-/*String related to the db types*/
-#define GFDB_STR_HASH_FILE_STORE "hashfile"
-#define GFDB_STR_ROCKS_DB "rocksdb"
-#define GFDB_STR_SQLITE3 "sqlite3"
-#define GFDB_STR_HYPERDEX "hyperdex"
-
-/*Convert db type in string to gfdb_db_type_t*/
-static inline int
-gf_string2gfdbdbtype(char *db_option)
-{
- int ret = -1;
-
- if (!db_option)
- goto out;
- if (strcmp(db_option, GFDB_STR_HASH_FILE_STORE) == 0) {
- ret = GFDB_HASH_FILE_STORE;
- } else if (strcmp(db_option, GFDB_STR_ROCKS_DB) == 0) {
- ret = GFDB_ROCKS_DB;
- } else if (strcmp(db_option, GFDB_STR_SQLITE3) == 0) {
- ret = GFDB_SQLITE3;
- } else if (strcmp(db_option, GFDB_STR_HYPERDEX) == 0) {
- ret = GFDB_HYPERDEX;
- }
-out:
- return ret;
-}
-
-/*Tells the path of the fop*/
-typedef enum gfdb_fop_path {
- GFDB_FOP_INVALID = -1,
- /*Filler value for zero*/
- GFDB_FOP_PATH_ZERO = 0,
- /*have wind path below this*/
- GFDB_FOP_WIND = 1,
- GFDB_FOP_WDEL = 2,
- /*have unwind path below this*/
- GFDB_FOP_UNWIND = 4,
- /*Delete unwind path*/
- GFDB_FOP_UNDEL = 8,
- GFDB_FOP_UNDEL_ALL = 16
-} gfdb_fop_path_t;
-/*Strings related to the above fop path*/
-#define GFDB_STR_FOP_INVALID "INVALID"
-#define GFDB_STR_FOP_WIND "ENTRY"
-#define GFDB_STR_FOP_UNWIND "EXIT"
-#define GFDB_STR_FOP_WDEL "WDEL"
-#define GFDB_STR_FOP_UNDEL "UNDEL"
-
-static inline gf_boolean_t
-iswindpath(gfdb_fop_path_t gfdb_fop_path)
-{
- return ((gfdb_fop_path == GFDB_FOP_WIND) ||
- (gfdb_fop_path == GFDB_FOP_WDEL))
- ? _gf_true
- : _gf_false;
-}
-
-static inline gf_boolean_t
-isunwindpath(gfdb_fop_path_t gfdb_fop_path)
-{
- return (gfdb_fop_path >= GFDB_FOP_UNWIND) ? _gf_true : _gf_false;
-}
-
-/*Tell what type of fop it was
- * Like whether a dentry fop or a inode fop
- * Read fop or a write fop etc*/
-typedef enum gfdb_fop_type {
- GFDB_FOP_INVALID_OP = -1,
- /*Filler value for zero*/
- GFDB_FOP_TYPE_ZERO = 0,
- GFDB_FOP_DENTRY_OP = 1,
- GFDB_FOP_DENTRY_CREATE_OP = 2,
- GFDB_FOP_INODE_OP = 4,
- GFDB_FOP_WRITE_OP = 8,
- GFDB_FOP_READ_OP = 16
-} gfdb_fop_type_t;
-
-#define GFDB_FOP_INODE_WRITE (GFDB_FOP_INODE_OP | GFDB_FOP_WRITE_OP)
-
-#define GFDB_FOP_DENTRY_WRITE (GFDB_FOP_DENTRY_OP | GFDB_FOP_WRITE_OP)
-
-#define GFDB_FOP_CREATE_WRITE (GFDB_FOP_DENTRY_CREATE_OP | GFDB_FOP_WRITE_OP)
-
-#define GFDB_FOP_INODE_READ (GFDB_FOP_INODE_OP | GFDB_FOP_READ_OP)
-
-static inline gf_boolean_t
-isreadfop(gfdb_fop_type_t fop_type)
-{
- return (fop_type & GFDB_FOP_READ_OP) ? _gf_true : _gf_false;
-}
-
-static inline gf_boolean_t
-isdentryfop(gfdb_fop_type_t fop_type)
-{
- return ((fop_type & GFDB_FOP_DENTRY_OP) ||
- (fop_type & GFDB_FOP_DENTRY_CREATE_OP))
- ? _gf_true
- : _gf_false;
-}
-
-static inline gf_boolean_t
-isdentrycreatefop(gfdb_fop_type_t fop_type)
-{
- return (fop_type & GFDB_FOP_DENTRY_CREATE_OP) ? _gf_true : _gf_false;
-}
-
-/*The structure that is used to send insert/update the databases
- * using insert_db api*/
-typedef struct gfdb_db_record {
- /* GFID */
- uuid_t gfid;
- /* Used during a rename refer ctr_rename() in changetimerecorder
- * xlator*/
- uuid_t old_gfid;
- /* Parent GFID */
- uuid_t pargfid;
- uuid_t old_pargfid;
- /* File names */
- char file_name[GF_NAME_MAX + 1];
- char old_file_name[GF_NAME_MAX + 1];
- /* FOP type and FOP path*/
- gfdb_fop_type_t gfdb_fop_type;
- gfdb_fop_path_t gfdb_fop_path;
- /*Time of change or access*/
- gfdb_time_t gfdb_wind_change_time;
- gfdb_time_t gfdb_unwind_change_time;
- /* For crash consistency while inserting/updating hard links */
- gf_boolean_t islinkupdate;
- /* For link consistency we do a double update i.e mark the link
- * during the wind and during the unwind we update/delete the link.
- * This has a performance hit. We give a choice here whether we need
- * link consistency to be spoton or not using link_consistency flag.
- * This will have only one link update */
- gf_boolean_t link_consistency;
- /* For dentry fops we can choose to ignore recording of unwind time */
- /* For inode fops "record_exit" volume option does the trick, */
- /* but for dentry fops we update the LINK_UPDATE, so an extra */
- /* flag is provided to ignore the recording of the unwind time. */
- gf_boolean_t do_record_uwind_time;
- /* Global flag to record or not record counters */
- gf_boolean_t do_record_counters;
- /* Global flag to Record/Not Record wind or wind time.
- * This flag will overrule do_record_uwind_time*/
- gf_boolean_t do_record_times;
- /* Ignoring errors while inserting.
- * */
- gf_boolean_t ignore_errors;
-} gfdb_db_record_t;
-
-/*******************************************************************************
- *
- * Signatures for the plugin functions
- * i.e Any plugin should implementment
- * these functions to integrate with
- * libgfdb.
- *
- * ****************************************************************************/
-
-/*Call back function for querying the database*/
-typedef int (*gf_query_callback_t)(gfdb_query_record_t *, void *);
-
-/* Used to initialize db connection
- * Arguments:
- * args : Dictionary containing database specific parameters
- * db_conn : pointer to plugin specific data base connection
- * that will be created. If the call is successful
- * db_conn will contain the plugin specific connection
- * If call is unsuccessful will have NULL.
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_init_db_t)(dict_t *args, void **db_conn);
-
-/* Used to terminate/de-initialize db connection
- * (Destructor function for db connection object)
- * Arguments:
- * db_conn : plugin specific data base connection
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_fini_db_t)(void **db_conn);
-
-/*Used to insert/updated records in the database
- * Arguments:
- * db_conn : plugin specific data base connection
- * gfdb_db_record : Record to be inserted/updated
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_insert_record_t)(void *db_conn, gfdb_db_record_t *db_record);
-
-/*Used to delete record from the database
- * Arguments:
- * db_conn : plugin specific data base connection
- * gfdb_db_record : Record to be deleted
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_delete_record_t)(void *db_conn, gfdb_db_record_t *db_record);
-
-/*Used to compact the database
- * Arguments:
- * db_conn : GFDB Connection node
- * compact_active : Is compaction currently on?
- * compact_mode_switched : Was the compaction switch flipped?
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_compact_db_t)(void *db_conn, gf_boolean_t compact_active,
- gf_boolean_t compact_mode_switched);
-
-/* Query all the records from the database
- * Arguments:
- * db_conn : plugin specific data base connection
- * query_callback : Call back function that will be called
- * for every record found
- * _query_cbk_args : Custom argument passed for the call back
- * function query_callback
- * query_limit : 0 - list all files
- * positive value - add the LIMIT clause to
- * the SQL query to limit the number of records
- * returned
- *
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_find_all_t)(void *db_conn,
- gf_query_callback_t query_callback,
- void *_cbk_args, int query_limit);
-
-/* Query records/files that have not changed/accessed
- * from a time in past to current time
- * Arguments:
- * db_conn : plugin specific data base connection
- * query_callback : Call back function that will be called
- * for every record found
- * _cbk_args : Custom argument passed for the call back
- * function query_callback
- * for_time : Time from where the file/s are not
- * changed/accessed
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_find_unchanged_for_time_t)(
- void *db_conn, gf_query_callback_t query_callback, void *_cbk_args,
- gfdb_time_t *_time);
-
-/* Query records/files that have changed/accessed from a
- * time in past to current time
- * Arguments:
- * db_conn : plugin specific data base connection
- * query_callback : Call back function that will be called
- * for every record found
- * _cbk_args : Custom argument passed for the call back
- * function query_callback
- * _time : Time from where the file/s are
- * changed/accessed
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_find_recently_changed_files_t)(
- void *db_conn, gf_query_callback_t query_callback, void *_cbk_args,
- gfdb_time_t *_time);
-
-/* Query records/files that have not changed/accessed
- * from a time in past to current time, with
- * a desired frequency
- *
- * Arguments:
- * db_conn : plugin specific data base connection
- * query_callback : Call back function that will be called
- * for every record found
- * _cbk_args : Custom argument passed for the call back
- * function query_callback
- * _time : Time from where the file/s are not
- * changed/accessed
- * _write_freq : Desired Write Frequency lower limit
- * _read_freq : Desired Read Frequency lower limit
- * _clear_counters : If true, Clears all the frequency counters of
- * all files.
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_find_unchanged_for_time_freq_t)(
- void *db_conn, gf_query_callback_t query_callback, void *_cbk_args,
- gfdb_time_t *_time, int _write_freq, int _read_freq,
- gf_boolean_t _clear_counters);
-
-/* Query records/files that have changed/accessed from a
- * time in past to current time, with a desired frequency
- * Arguments:
- * db_conn : plugin specific data base connection
- * query_callback : Call back function that will be called
- * for every record found
- * _cbk_args : Custom argument passed for the call back
- * function query_callback
- * _time : Time from where the file/s are
- * changed/accessed
- * _write_freq : Desired Write Frequency lower limit
- * _read_freq : Desired Read Frequency lower limit
- * _clear_counters : If true, Clears all the frequency counters of
- * all files.
- * Returns : if successful return 0 or
- * -ve value in case of failure*/
-typedef int (*gfdb_find_recently_changed_files_freq_t)(
- void *db_conn, gf_query_callback_t query_callback, void *_cbk_args,
- gfdb_time_t *_time, int _write_freq, int _read_freq,
- gf_boolean_t _clear_counters);
-
-typedef int (*gfdb_clear_files_heat_t)(void *db_conn);
-
-typedef int (*gfdb_get_db_version_t)(void *db_conn, char **version);
-
-typedef int (*gfdb_get_db_params_t)(void *db_conn, char *param_key,
- char **param_value);
-
-typedef int (*gfdb_set_db_params_t)(void *db_conn, char *param_key,
- char *param_value);
-
-/*Data structure holding all the above plugin function pointers*/
-typedef struct gfdb_db_operations {
- gfdb_init_db_t init_db_op;
- gfdb_fini_db_t fini_db_op;
- gfdb_insert_record_t insert_record_op;
- gfdb_delete_record_t delete_record_op;
- gfdb_compact_db_t compact_db_op;
- gfdb_find_all_t find_all_op;
- gfdb_find_unchanged_for_time_t find_unchanged_for_time_op;
- gfdb_find_recently_changed_files_t find_recently_changed_files_op;
- gfdb_find_unchanged_for_time_freq_t find_unchanged_for_time_freq_op;
- gfdb_find_recently_changed_files_freq_t find_recently_changed_files_freq_op;
- gfdb_clear_files_heat_t clear_files_heat_op;
- gfdb_get_db_version_t get_db_version;
- gfdb_get_db_params_t get_db_params;
- gfdb_set_db_params_t set_db_params;
-} gfdb_db_operations_t;
-
-/*******************************************************************************
- *
- * Database connection object: This objected is maitained by libgfdb for each
- * database connection created.
- * gf_db_connection : DB connection specific to the plugin
- * gfdb_db_operations : Contains all the libgfdb API implementation
- * from the plugin.
- * gfdb_db_type : Type of database
- *
- * ****************************************************************************/
-
-typedef struct gfdb_connection {
- void *gf_db_connection;
- gfdb_db_operations_t gfdb_db_operations;
- gfdb_db_type_t gfdb_db_type;
-} gfdb_connection_t;
-
-/*******************************************************************************
- *
- * Macros for get and set db options
- *
- * ****************************************************************************/
-
-/*Set param_key : str_value into param_dict*/
-#define SET_DB_PARAM_TO_DICT(comp_name, params_dict, param_key, str_value, \
- ret, error) \
- do { \
- data_t *data = NULL; \
- data = str_to_data(str_value); \
- if (!data) \
- goto error; \
- ret = dict_add(params_dict, param_key, data); \
- if (ret) { \
- gf_msg(comp_name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, \
- "Failed setting %s " \
- "to params dictionary", \
- param_key); \
- data_destroy(data); \
- goto error; \
- }; \
- } while (0)
-
-/*get str_value of param_key from param_dict*/
-#define GET_DB_PARAM_FROM_DICT(comp_name, params_dict, param_key, str_value, \
- error) \
- do { \
- data_t *data = NULL; \
- data = dict_get(params_dict, param_key); \
- if (!data) { \
- gf_msg(comp_name, GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED, \
- "Failed to retrieve " \
- "%s from params", \
- param_key); \
- goto error; \
- } else { \
- str_value = data->data; \
- }; \
- } while (0)
-
-/*get str_value of param_key from param_dict. if param_key is not present
- * set _default_v to str_value */
-#define GET_DB_PARAM_FROM_DICT_DEFAULT(comp_name, params_dict, param_key, \
- str_value, _default_v) \
- do { \
- data_t *data = NULL; \
- data = dict_get(params_dict, param_key); \
- if (!data) { \
- str_value = _default_v; \
- gf_msg(comp_name, GF_LOG_TRACE, 0, LG_MSG_GET_PARAM_FAILED, \
- "Failed to retrieve " \
- "%s from params.Assigning default value: %s", \
- param_key, _default_v); \
- } else { \
- str_value = data->data; \
- }; \
- } while (0)
-
-#endif
diff --git a/libglusterfs/src/gfdb/gfdb_mem-types.h b/libglusterfs/src/gfdb/gfdb_mem-types.h
deleted file mode 100644
index 0ea543b7ce1..00000000000
--- a/libglusterfs/src/gfdb/gfdb_mem-types.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __GFDB_MEM_TYPES_H__
-#define __GFDB_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gfdb_mem_types_ { gfdb_mtstart = gf_common_mt_end + 1, gfdb_mt_end };
-#endif
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.c b/libglusterfs/src/gfdb/gfdb_sqlite3.c
deleted file mode 100644
index 63957278e8a..00000000000
--- a/libglusterfs/src/gfdb/gfdb_sqlite3.c
+++ /dev/null
@@ -1,1542 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "gfdb_sqlite3.h"
-#include "gfdb_sqlite3_helper.h"
-#include "libglusterfs-messages.h"
-#include "syscall.h"
-
-/******************************************************************************
- *
- * Util functions
- *
- * ***************************************************************************/
-gf_sql_connection_t *
-gf_sql_connection_init()
-{
- gf_sql_connection_t *gf_sql_conn = NULL;
-
- gf_sql_conn = GF_CALLOC(1, sizeof(gf_sql_connection_t),
- gf_mt_sql_connection_t);
- if (gf_sql_conn == NULL) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "Error allocating memory to "
- "gf_sql_connection_t ");
- }
-
- return gf_sql_conn;
-}
-
-void
-gf_sql_connection_fini(gf_sql_connection_t **sql_connection)
-{
- if (!sql_connection)
- return;
- GF_FREE(*sql_connection);
- *sql_connection = NULL;
-}
-
-const char *
-gf_sql_jm2str(gf_sql_journal_mode_t jm)
-{
- switch (jm) {
- case gf_sql_jm_delete:
- return GF_SQL_JM_DELETE;
- case gf_sql_jm_truncate:
- return GF_SQL_JM_TRUNCATE;
- case gf_sql_jm_persist:
- return GF_SQL_JM_PERSIST;
- case gf_sql_jm_memory:
- return GF_SQL_JM_MEMORY;
- case gf_sql_jm_wal:
- return GF_SQL_JM_WAL;
- case gf_sql_jm_off:
- return GF_SQL_JM_OFF;
- case gf_sql_jm_invalid:
- break;
- }
- return NULL;
-}
-
-gf_sql_journal_mode_t
-gf_sql_str2jm(const char *jm_str)
-{
- if (!jm_str) {
- return gf_sql_jm_invalid;
- } else if (strcmp(jm_str, GF_SQL_JM_DELETE) == 0) {
- return gf_sql_jm_delete;
- } else if (strcmp(jm_str, GF_SQL_JM_TRUNCATE) == 0) {
- return gf_sql_jm_truncate;
- } else if (strcmp(jm_str, GF_SQL_JM_PERSIST) == 0) {
- return gf_sql_jm_persist;
- } else if (strcmp(jm_str, GF_SQL_JM_MEMORY) == 0) {
- return gf_sql_jm_memory;
- } else if (strcmp(jm_str, GF_SQL_JM_WAL) == 0) {
- return gf_sql_jm_wal;
- } else if (strcmp(jm_str, GF_SQL_JM_OFF) == 0) {
- return gf_sql_jm_off;
- }
- return gf_sql_jm_invalid;
-}
-
-const char *
-gf_sql_av_t2str(gf_sql_auto_vacuum_t sql_av)
-{
- switch (sql_av) {
- case gf_sql_av_none:
- return GF_SQL_AV_NONE;
- case gf_sql_av_full:
- return GF_SQL_AV_FULL;
- case gf_sql_av_incr:
- return GF_SQL_AV_INCR;
- case gf_sql_av_invalid:
- break;
- }
- return NULL;
-}
-
-gf_sql_auto_vacuum_t
-gf_sql_str2av_t(const char *av_str)
-{
- if (!av_str) {
- return gf_sql_av_invalid;
- } else if (strcmp(av_str, GF_SQL_AV_NONE) == 0) {
- return gf_sql_av_none;
- } else if (strcmp(av_str, GF_SQL_AV_FULL) == 0) {
- return gf_sql_av_full;
- } else if (strcmp(av_str, GF_SQL_AV_INCR) == 0) {
- return gf_sql_av_incr;
- }
- return gf_sql_av_invalid;
-}
-
-const char *
-gf_sync_t2str(gf_sql_sync_t sql_sync)
-{
- switch (sql_sync) {
- case gf_sql_sync_off:
- return GF_SQL_SYNC_OFF;
- case gf_sql_sync_normal:
- return GF_SQL_SYNC_NORMAL;
- case gf_sql_sync_full:
- return GF_SQL_SYNC_FULL;
- case gf_sql_sync_invalid:
- break;
- }
- return NULL;
-}
-
-gf_sql_sync_t
-gf_sql_str2sync_t(const char *sync_str)
-{
- if (!sync_str) {
- return gf_sql_sync_invalid;
- } else if (strcmp(sync_str, GF_SQL_SYNC_OFF) == 0) {
- return gf_sql_sync_off;
- } else if (strcmp(sync_str, GF_SQL_SYNC_NORMAL) == 0) {
- return gf_sql_sync_normal;
- } else if (strcmp(sync_str, GF_SQL_SYNC_FULL) == 0) {
- return gf_sql_sync_full;
- }
- return gf_sql_sync_invalid;
-}
-
-/*TODO replace GF_CALLOC by mem_pool or iobuff if required for performance */
-static char *
-sql_stmt_init()
-{
- char *sql_stmt = NULL;
-
- sql_stmt = GF_CALLOC(GF_STMT_SIZE_MAX, sizeof(char), gf_common_mt_char);
-
- if (!sql_stmt) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
- "Error allocating memory to SQL "
- "Statement ");
- goto out;
- }
-out:
- return sql_stmt;
-}
-
-/*TODO replace GF_FREE by mem_pool or iobuff if required for performance */
-static void
-sql_stmt_fini(char **sql_stmt)
-{
- GF_FREE(*sql_stmt);
-}
-
-/******************************************************************************
- * DB Essential functions used by
- * > gf_open_sqlite3_conn ()
- * > gf_close_sqlite3_conn ()
- * ***************************************************************************/
-static sqlite3 *
-gf_open_sqlite3_conn(char *sqlite3_db_path, int flags)
-{
- sqlite3 *sqlite3_db_conn = NULL;
- int ret = -1;
-
- GF_ASSERT(sqlite3_db_path);
-
- /*Creates DB if not created*/
- ret = sqlite3_open_v2(sqlite3_db_path, &sqlite3_db_conn, flags, NULL);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_DB_ERROR,
- "FATAL: Could open %s : %s", sqlite3_db_path,
- sqlite3_errmsg(sqlite3_db_conn));
- }
- return sqlite3_db_conn;
-}
-
-static int
-gf_close_sqlite3_conn(sqlite3 *sqlite3_db_conn)
-{
- int ret = 0;
-
- GF_ASSERT(sqlite3_db_conn);
-
- if (sqlite3_db_conn) {
- ret = sqlite3_close(sqlite3_db_conn);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CONNECTION_ERROR,
- "FATAL: sqlite3 close"
- " connection failed %s",
- sqlite3_errmsg(sqlite3_db_conn));
- ret = -1;
- goto out;
- }
- }
- ret = 0;
-out:
- return ret;
-}
-
-/******************************************************************************
- *
- * Database init / fini / create table
- *
- * ***************************************************************************/
-
-/*Function to fill db operations*/
-void
-gf_sqlite3_fill_db_operations(gfdb_db_operations_t *gfdb_db_ops)
-{
- GF_ASSERT(gfdb_db_ops);
-
- gfdb_db_ops->init_db_op = gf_sqlite3_init;
- gfdb_db_ops->fini_db_op = gf_sqlite3_fini;
-
- gfdb_db_ops->insert_record_op = gf_sqlite3_insert;
- gfdb_db_ops->delete_record_op = gf_sqlite3_delete;
- gfdb_db_ops->compact_db_op = gf_sqlite3_vacuum;
-
- gfdb_db_ops->find_all_op = gf_sqlite3_find_all;
- gfdb_db_ops
- ->find_unchanged_for_time_op = gf_sqlite3_find_unchanged_for_time;
- gfdb_db_ops->find_recently_changed_files_op =
- gf_sqlite3_find_recently_changed_files;
- gfdb_db_ops->find_unchanged_for_time_freq_op =
- gf_sqlite3_find_unchanged_for_time_freq;
- gfdb_db_ops->find_recently_changed_files_freq_op =
- gf_sqlite3_find_recently_changed_files_freq;
-
- gfdb_db_ops->clear_files_heat_op = gf_sqlite3_clear_files_heat;
-
- gfdb_db_ops->get_db_version = gf_sqlite3_version;
-
- gfdb_db_ops->get_db_params = gf_sqlite3_pragma;
-
- gfdb_db_ops->set_db_params = gf_sqlite3_set_pragma;
-}
-
-static int
-create_filetable(sqlite3 *sqlite3_db_conn)
-{
- int ret = -1;
- char *sql_stmt = NULL;
- char *sql_strerror = NULL;
-
- GF_ASSERT(sqlite3_db_conn);
-
- sql_stmt = sql_stmt_init();
- if (!sql_stmt) {
- ret = ENOMEM;
- goto out;
- }
-
- GF_CREATE_STMT(sql_stmt);
-
- ret = sqlite3_exec(sqlite3_db_conn, sql_stmt, NULL, NULL, &sql_strerror);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_EXEC_FAILED,
- "Failed executing: %s : %s", sql_stmt, sql_strerror);
- sqlite3_free(sql_strerror);
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- sql_stmt_fini(&sql_stmt);
- return ret;
-}
-
-static int
-apply_sql_params_db(gf_sql_connection_t *sql_conn, dict_t *param_dict)
-{
- int ret = -1;
- char *temp_str = NULL;
- char sqlite3_config_str[GF_NAME_MAX] = "";
-
- GF_ASSERT(sql_conn);
- GF_ASSERT(param_dict);
-
- /*Extract sql page_size from param_dict,
- * if not specified default value will be GF_SQL_DEFAULT_PAGE_SIZE*/
- temp_str = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict,
- GFDB_SQL_PARAM_PAGE_SIZE, temp_str,
- GF_SQL_DEFAULT_PAGE_SIZE);
- sql_conn->page_size = atoi(temp_str);
- /*Apply page_size on the sqlite db*/
- GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "page_size", "%zd",
- sql_conn->page_size, ret, out);
-
- /*Extract sql cache size from param_dict,
- * if not specified default value will be
- * GF_SQL_DEFAULT_CACHE_SIZE pages*/
- temp_str = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict,
- GFDB_SQL_PARAM_CACHE_SIZE, temp_str,
- GF_SQL_DEFAULT_CACHE_SIZE);
- sql_conn->cache_size = atoi(temp_str);
- /*Apply cache size on the sqlite db*/
- GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "cache_size", "%zd",
- sql_conn->cache_size, ret, out);
-
- /*Extract sql journal mode from param_dict,
- * if not specified default value will be
- * GF_SQL_DEFAULT_JOURNAL_MODE i.e "wal"*/
- temp_str = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict,
- GFDB_SQL_PARAM_JOURNAL_MODE, temp_str,
- GF_SQL_DEFAULT_JOURNAL_MODE);
- sql_conn->journal_mode = gf_sql_str2jm(temp_str);
- /*Apply journal mode to the sqlite db*/
- GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "journal_mode", "%s", temp_str,
- ret, out);
-
- /*Only when the journal mode is WAL, wal_autocheckpoint makes sense*/
- if (sql_conn->journal_mode == gf_sql_jm_wal) {
- /*Extract sql wal auto check point from param_dict
- * if not specified default value will be
- * GF_SQL_DEFAULT_WAL_AUTOCHECKPOINT pages*/
- temp_str = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict,
- GFDB_SQL_PARAM_WAL_AUTOCHECK, temp_str,
- GF_SQL_DEFAULT_WAL_AUTOCHECKPOINT);
- sql_conn->wal_autocheckpoint = atoi(temp_str);
- /*Apply wal auto check point to the sqlite db*/
- GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "wal_autocheckpoint", "%zd",
- sql_conn->wal_autocheckpoint, ret, out);
- }
-
- /*Extract sql synchronous from param_dict
- * if not specified default value will be GF_SQL_DEFAULT_SYNC*/
- temp_str = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict,
- GFDB_SQL_PARAM_SYNC, temp_str,
- GF_SQL_DEFAULT_SYNC);
- sql_conn->synchronous = gf_sql_str2sync_t(temp_str);
- /*Apply synchronous to the sqlite db*/
- GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "synchronous", "%d",
- sql_conn->synchronous, ret, out);
-
- /*Extract sql auto_vacuum from param_dict
- * if not specified default value will be GF_SQL_DEFAULT_AUTO_VACUUM*/
- temp_str = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(GFDB_STR_SQLITE3, param_dict,
- GFDB_SQL_PARAM_AUTO_VACUUM, temp_str,
- GF_SQL_DEFAULT_AUTO_VACUUM);
- sql_conn->auto_vacuum = gf_sql_str2av_t(temp_str);
- /*Apply auto_vacuum to the sqlite db*/
- GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, "auto_vacuum", "%d",
- sql_conn->auto_vacuum, ret, out);
-
- ret = 0;
-out:
- return ret;
-}
-
-int
-gf_sqlite3_init(dict_t *args, void **db_conn)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = NULL;
- struct stat stbuf = {
- 0,
- };
- gf_boolean_t is_dbfile_exist = _gf_false;
- char *temp_str = NULL;
-
- GF_ASSERT(args);
- GF_ASSERT(db_conn);
-
- if (*db_conn != NULL) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CONNECTION_ERROR,
- "DB Connection is not "
- "empty!");
- return 0;
- }
-
- if (!sqlite3_threadsafe()) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_NOT_MULTITHREAD_MODE,
- "sqlite3 is not in multithreaded mode");
- goto out;
- }
-
- sql_conn = gf_sql_connection_init();
- if (!sql_conn) {
- goto out;
- }
-
- /*Extract sql db path from args*/
- temp_str = NULL;
- GET_DB_PARAM_FROM_DICT(GFDB_STR_SQLITE3, args, GFDB_SQL_PARAM_DBPATH,
- temp_str, out);
- strncpy(sql_conn->sqlite3_db_path, temp_str, PATH_MAX - 1);
- sql_conn->sqlite3_db_path[PATH_MAX - 1] = 0;
-
- is_dbfile_exist = (sys_stat(sql_conn->sqlite3_db_path, &stbuf) == 0)
- ? _gf_true
- : _gf_false;
-
- /*Creates DB if not created*/
- sql_conn->sqlite3_db_conn = gf_open_sqlite3_conn(
- sql_conn->sqlite3_db_path, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE);
- if (!sql_conn->sqlite3_db_conn) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CONNECTION_ERROR,
- "Failed creating db connection");
- goto out;
- }
-
- /* If the file exist we skip the config part
- * and creation of the schema */
- if (is_dbfile_exist)
- goto db_exists;
-
- /*Apply sqlite3 params to database*/
- ret = apply_sql_params_db(sql_conn, args);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed applying sql params"
- " to %s",
- sql_conn->sqlite3_db_path);
- goto out;
- }
-
- /*Create the schema if NOT present*/
- ret = create_filetable(sql_conn->sqlite3_db_conn);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Failed Creating %s Table", GF_FILE_TABLE);
- goto out;
- }
-
-db_exists:
- ret = 0;
-out:
- if (ret) {
- gf_sqlite3_fini((void **)&sql_conn);
- }
-
- *db_conn = sql_conn;
-
- return ret;
-}
-
-int
-gf_sqlite3_fini(void **db_conn)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = NULL;
-
- GF_ASSERT(db_conn);
- sql_conn = *db_conn;
-
- if (sql_conn) {
- if (sql_conn->sqlite3_db_conn) {
- ret = gf_close_sqlite3_conn(sql_conn->sqlite3_db_conn);
- if (ret) {
- /*Logging of error done in
- * gf_close_sqlite3_conn()*/
- goto out;
- }
- sql_conn->sqlite3_db_conn = NULL;
- }
- gf_sql_connection_fini(&sql_conn);
- }
- *db_conn = sql_conn;
- ret = 0;
-out:
- return ret;
-}
-
-/******************************************************************************
- *
- * INSERT/UPDATE/DELETE Operations
- *
- *
- * ***************************************************************************/
-
-int
-gf_sqlite3_insert(void *db_conn, gfdb_db_record_t *gfdb_db_record)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = db_conn;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out);
-
- switch (gfdb_db_record->gfdb_fop_path) {
- case GFDB_FOP_WIND:
- ret = gf_sql_insert_wind(sql_conn, gfdb_db_record);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_INSERT_FAILED, "Failed wind insert");
- goto out;
- }
- break;
- case GFDB_FOP_UNWIND:
- ret = gf_sql_insert_unwind(sql_conn, gfdb_db_record);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_INSERT_FAILED, "Failed unwind insert");
- goto out;
- }
- break;
-
- case GFDB_FOP_WDEL:
- ret = gf_sql_update_delete_wind(sql_conn, gfdb_db_record);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_UPDATE_FAILED,
- "Failed updating delete "
- "during wind");
- goto out;
- }
- break;
- case GFDB_FOP_UNDEL:
- case GFDB_FOP_UNDEL_ALL:
- ret = gf_sql_delete_unwind(sql_conn, gfdb_db_record);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_DELETE_FAILED, "Failed deleting");
- goto out;
- }
- break;
- case GFDB_FOP_INVALID:
- default:
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_INVALID_FOP,
- "Cannot record to DB: Invalid FOP");
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int
-gf_sqlite3_delete(void *db_conn, gfdb_db_record_t *gfdb_db_record)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = db_conn;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out);
-
- ret = 0;
-out:
- return ret;
-}
-
-/******************************************************************************
- *
- * SELECT QUERY FUNCTIONS
- *
- *
- * ***************************************************************************/
-
-static int
-gf_get_basic_query_stmt(char **out_stmt)
-{
- int ret = -1;
- ret = gf_asprintf(out_stmt,
- "select GF_FILE_TB.GF_ID,"
- "GF_FLINK_TB.GF_PID ,"
- "GF_FLINK_TB.FNAME "
- "from GF_FLINK_TB, GF_FILE_TB "
- "where "
- "GF_FILE_TB.GF_ID = GF_FLINK_TB.GF_ID ");
- if (ret <= 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed to create base query statement");
- *out_stmt = NULL;
- }
- return ret;
-}
-
-/*
- * Find All files recorded in the DB
- * Input:
- * query_callback : query callback function to handle
- * result records from the query
- * */
-int
-gf_sqlite3_find_all(void *db_conn, gf_query_callback_t query_callback,
- void *query_cbk_args, int query_limit)
-{
- int ret = -1;
- char *query_str = NULL;
- gf_sql_connection_t *sql_conn = db_conn;
- sqlite3_stmt *prep_stmt = NULL;
- char *limit_query = NULL;
- char *query = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out);
-
- ret = gf_get_basic_query_stmt(&query_str);
- if (ret <= 0) {
- goto out;
- }
-
- query = query_str;
-
- if (query_limit > 0) {
- ret = gf_asprintf(&limit_query, "%s LIMIT %d", query, query_limit);
- if (ret < 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed creating limit query statement");
- limit_query = NULL;
- goto out;
- }
-
- query = limit_query;
- }
-
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query, -1, &prep_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed to prepare statement %s: %s", query,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed Query %s", query);
- goto out;
- }
-
- ret = 0;
-out:
- sqlite3_finalize(prep_stmt);
- GF_FREE(query_str);
-
- if (limit_query)
- GF_FREE(limit_query);
-
- return ret;
-}
-
-/*
- * Find recently changed files from the DB
- * Input:
- * query_callback : query callback function to handle
- * result records from the query
- * from_time : Time to define what is recent
- * */
-int
-gf_sqlite3_find_recently_changed_files(void *db_conn,
- gf_query_callback_t query_callback,
- void *query_cbk_args,
- gfdb_time_t *from_time)
-{
- int ret = -1;
- char *query_str = NULL;
- gf_sql_connection_t *sql_conn = db_conn;
- sqlite3_stmt *prep_stmt = NULL;
- uint64_t from_time_usec = 0;
- char *base_query_str = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out);
-
- ret = gf_get_basic_query_stmt(&base_query_str);
- if (ret <= 0) {
- goto out;
- }
-
- ret = gf_asprintf(
- &query_str,
- "%s AND"
- /*First condition: For writes*/
- "( ((" GF_COL_TB_WSEC " * " TOSTRING(
- GFDB_MICROSEC) " + " GF_COL_TB_WMSEC
- ") >= ? )"
- " OR "
- /*Second condition: For reads*/
- "((" GF_COL_TB_RWSEC " * " TOSTRING(
- GFDB_MICROSEC) " + " GF_COL_TB_RWMSEC
- ") >= ?) )"
- /* Order by write wind time in a
- * descending order i.e most hot
- * files w.r.t to write */
- " ORDER BY GF_FILE_TB.W_SEC DESC",
- base_query_str);
-
- if (ret < 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed creating query statement");
- query_str = NULL;
- goto out;
- }
-
- from_time_usec = gfdb_time_2_usec(from_time);
-
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query_str, -1, &prep_stmt,
- 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed to prepare statement %s :"
- " %s",
- query_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind write wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 1, from_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind from_time_usec "
- "%" PRIu64 " : %s",
- from_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind read wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 2, from_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind from_time_usec "
- "%" PRIu64 " : %s ",
- from_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the query*/
- ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed Query %s", query_str);
- goto out;
- }
-
- ret = 0;
-out:
- sqlite3_finalize(prep_stmt);
- GF_FREE(base_query_str);
- GF_FREE(query_str);
- return ret;
-}
-
-/*
- * Find unchanged files from a specified time from the DB
- * Input:
- * query_callback : query callback function to handle
- * result records from the query
- * for_time : Time from where the file/s are not changed
- * */
-int
-gf_sqlite3_find_unchanged_for_time(void *db_conn,
- gf_query_callback_t query_callback,
- void *query_cbk_args, gfdb_time_t *for_time)
-{
- int ret = -1;
- char *query_str = NULL;
- gf_sql_connection_t *sql_conn = db_conn;
- sqlite3_stmt *prep_stmt = NULL;
- uint64_t for_time_usec = 0;
- char *base_query_str = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out);
-
- ret = gf_get_basic_query_stmt(&base_query_str);
- if (ret <= 0) {
- goto out;
- }
-
- ret = gf_asprintf(
- &query_str,
- "%s AND "
- /*First condition: For writes*/
- "( ((" GF_COL_TB_WSEC " * " TOSTRING(
- GFDB_MICROSEC) " + " GF_COL_TB_WMSEC
- ") <= ? )"
- " AND "
- /*Second condition: For reads*/
- "((" GF_COL_TB_RWSEC " * " TOSTRING(
- GFDB_MICROSEC) " + " GF_COL_TB_RWMSEC
- ") <= ?) )"
- /* Order by write wind time in a
- * ascending order i.e most cold
- * files w.r.t to write */
- " ORDER BY GF_FILE_TB.W_SEC ASC",
- base_query_str);
-
- if (ret < 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed to create query statement");
- query_str = NULL;
- goto out;
- }
-
- for_time_usec = gfdb_time_2_usec(for_time);
-
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query_str, -1, &prep_stmt,
- 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed to prepare statement %s :"
- " %s",
- query_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind write wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 1, for_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind for_time_usec "
- "%" PRIu64 " : %s",
- for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind read wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 2, for_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind for_time_usec "
- "%" PRIu64 " : %s",
- for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the query*/
- ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed Query %s", query_str);
- goto out;
- }
-
- ret = 0;
-out:
- sqlite3_finalize(prep_stmt);
- GF_FREE(base_query_str);
- GF_FREE(query_str);
- return ret;
-}
-
-/*
- * Find recently changed files with a specific frequency from the DB
- * Input:
- * db_conn : db connection object
- * query_callback : query callback function to handle
- * result records from the query
- * from_time : Time to define what is recent
- * freq_write_cnt : Frequency thresold for write
- * freq_read_cnt : Frequency thresold for read
- * clear_counters : Clear counters (r/w) for all inodes in DB
- * */
-int
-gf_sqlite3_find_recently_changed_files_freq(
- void *db_conn, gf_query_callback_t query_callback, void *query_cbk_args,
- gfdb_time_t *from_time, int freq_write_cnt, int freq_read_cnt,
- gf_boolean_t clear_counters)
-{
- int ret = -1;
- char *query_str = NULL;
- gf_sql_connection_t *sql_conn = db_conn;
- sqlite3_stmt *prep_stmt = NULL;
- uint64_t from_time_usec = 0;
- char *base_query_str = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out);
-
- ret = gf_get_basic_query_stmt(&base_query_str);
- if (ret <= 0) {
- goto out;
- }
- ret = gf_asprintf(
- &query_str,
- "%s AND "
- /*First condition: For Writes*/
- "( ( ((" GF_COL_TB_WSEC " * " TOSTRING(
- GFDB_MICROSEC) " + " GF_COL_TB_WMSEC
- ") >= ? )"
- " AND "
- " (" GF_COL_TB_WFC
- " >= ? ) )"
- " OR "
- /*Second condition: For Reads */
- "( ((" GF_COL_TB_RWSEC " * " TOSTRING(
- GFDB_MICROSEC) " + " GF_COL_TB_RWMSEC
- ") >= ?)"
- " AND "
- " (" GF_COL_TB_RFC
- " >= ? ) ) )"
- /* Order by write wind time and
- * write freq in a descending
- * order
- * i.e most hot files w.r.t to
- * write */
- " ORDER BY GF_FILE_TB.W_SEC "
- "DESC, "
- "GF_FILE_TB.WRITE_FREQ_CNTR DESC",
- base_query_str);
-
- if (ret < 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed to create query statement");
- query_str = NULL;
- goto out;
- }
-
- from_time_usec = gfdb_time_2_usec(from_time);
-
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query_str, -1, &prep_stmt,
- 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed to prepare statement %s :"
- " %s",
- query_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind write wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 1, from_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind from_time_usec "
- "%" PRIu64 " : %s",
- from_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind write frequency thresold*/
- ret = sqlite3_bind_int(prep_stmt, 2, freq_write_cnt);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind freq_write_cnt "
- "%d : %s",
- freq_write_cnt, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind read wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 3, from_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind from_time_usec "
- "%" PRIu64 " : %s",
- from_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind read frequency thresold*/
- ret = sqlite3_bind_int(prep_stmt, 4, freq_read_cnt);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind freq_read_cnt "
- "%d : %s",
- freq_read_cnt, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the query*/
- ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed Query %s", query_str);
- goto out;
- }
-
- /*Clear counters*/
- if (clear_counters) {
- ret = gf_sql_clear_counters(sql_conn);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_CLEAR_COUNTER_FAILED,
- "Failed to clear"
- " counters!");
- goto out;
- }
- }
- ret = 0;
-out:
- sqlite3_finalize(prep_stmt);
- GF_FREE(base_query_str);
- GF_FREE(query_str);
- return ret;
-}
-
-/*
- * Find unchanged files from a specified time, w.r.t to frequency, from the DB
- * Input:
- * query_callback : query callback function to handle
- * result records from the query
- * for_time : Time from where the file/s are not changed
- * freq_write_cnt : Frequency thresold for write
- * freq_read_cnt : Frequency thresold for read
- * clear_counters : Clear counters (r/w) for all inodes in DB
- * */
-int
-gf_sqlite3_find_unchanged_for_time_freq(void *db_conn,
- gf_query_callback_t query_callback,
- void *query_cbk_args,
- gfdb_time_t *for_time,
- int freq_write_cnt, int freq_read_cnt,
- gf_boolean_t clear_counters)
-{
- int ret = -1;
- char *query_str = NULL;
- gf_sql_connection_t *sql_conn = db_conn;
- sqlite3_stmt *prep_stmt = NULL;
- uint64_t for_time_usec = 0;
- char *base_query_str = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out);
-
- ret = gf_get_basic_query_stmt(&base_query_str);
- if (ret <= 0) {
- goto out;
- }
-
- ret = gf_asprintf (&query_str, "%s AND "
- /*First condition: For Writes
- * Files that have write wind time smaller than for_time
- * OR
- * File that have write wind time greater than for_time,
- * but write_frequency less than freq_write_cnt*/
- "( ( ((" GF_COL_TB_WSEC " * " TOSTRING(GFDB_MICROSEC) " + "
- GF_COL_TB_WMSEC ") < ? )"
- " OR "
- "( (" GF_COL_TB_WFC " < ? ) AND"
- "((" GF_COL_TB_WSEC " * " TOSTRING(GFDB_MICROSEC) " + "
- GF_COL_TB_WMSEC ") >= ? ) ) )"
- " AND "
- /*Second condition: For Reads
- * Files that have read wind time smaller than for_time
- * OR
- * File that have read wind time greater than for_time,
- * but read_frequency less than freq_read_cnt*/
- "( ((" GF_COL_TB_RWSEC " * " TOSTRING(GFDB_MICROSEC) " + "
- GF_COL_TB_RWMSEC ") < ? )"
- " OR "
- "( (" GF_COL_TB_RFC " < ? ) AND"
- "((" GF_COL_TB_RWSEC " * " TOSTRING(GFDB_MICROSEC) " + "
- GF_COL_TB_RWMSEC ") >= ? ) ) ) )"
- /* Order by write wind time and write freq in ascending order
- * i.e most cold files w.r.t to write */
- " ORDER BY GF_FILE_TB.W_SEC ASC, "
- "GF_FILE_TB.WRITE_FREQ_CNTR ASC",
- base_query_str);
-
- if (ret < 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed to create query statement");
- query_str = NULL;
- goto out;
- }
-
- for_time_usec = gfdb_time_2_usec(for_time);
-
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, query_str, -1, &prep_stmt,
- 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed to prepare delete "
- "statement %s : %s",
- query_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind write wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 1, for_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind for_time_usec "
- "%" PRIu64 " : %s",
- for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind write frequency thresold*/
- ret = sqlite3_bind_int(prep_stmt, 2, freq_write_cnt);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind freq_write_cnt"
- " %d : %s",
- freq_write_cnt, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind write wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 3, for_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind for_time_usec "
- "%" PRIu64 " : %s",
- for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind read wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 4, for_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind for_time_usec "
- "%" PRIu64 " : %s",
- for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind read frequency thresold*/
- ret = sqlite3_bind_int(prep_stmt, 5, freq_read_cnt);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind freq_read_cnt "
- "%d : %s",
- freq_read_cnt, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind read wind time*/
- ret = sqlite3_bind_int64(prep_stmt, 6, for_time_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed to bind for_time_usec "
- "%" PRIu64 " : %s",
- for_time_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the query*/
- ret = gf_sql_query_function(prep_stmt, query_callback, query_cbk_args);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed Query %s", query_str);
- goto out;
- }
-
- /*Clear counters*/
- if (clear_counters) {
- ret = gf_sql_clear_counters(sql_conn);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_CLEAR_COUNTER_FAILED,
- "Failed to clear "
- "counters!");
- goto out;
- }
- }
-
- ret = 0;
-out:
- sqlite3_finalize(prep_stmt);
- GF_FREE(base_query_str);
- GF_FREE(query_str);
- return ret;
-}
-
-int
-gf_sqlite3_clear_files_heat(void *db_conn)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = db_conn;
-
- CHECK_SQL_CONN(sql_conn, out);
-
- ret = gf_sql_clear_counters(sql_conn);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CLEAR_COUNTER_FAILED,
- "Failed to clear "
- "files heat");
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/* Function to extract version of sqlite db
- * Input:
- * void *db_conn : Sqlite connection
- * char **version : the version is extracted as a string and will be stored in
- * this variable. The freeing of the memory should be done by
- * the caller.
- * Return:
- * On success return the length of the version string that is
- * extracted.
- * On failure return -1
- * */
-int
-gf_sqlite3_version(void *db_conn, char **version)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = db_conn;
- sqlite3_stmt *pre_stmt = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
-
- ret = sqlite3_prepare_v2(sql_conn->sqlite3_db_conn,
- "SELECT SQLITE_VERSION()", -1, &pre_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed init prepare stmt %s", sqlite3_errmsg(db_conn));
- ret = -1;
- goto out;
- }
-
- ret = sqlite3_step(pre_stmt);
- if (ret != SQLITE_ROW) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_RECORD_FAILED,
- "Failed to get records "
- "from db : %s",
- sqlite3_errmsg(db_conn));
- ret = -1;
- goto out;
- }
-
- ret = gf_asprintf(version, "%s", sqlite3_column_text(pre_stmt, 0));
- if (ret <= 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed extracting version");
- }
-
-out:
- sqlite3_finalize(pre_stmt);
-
- return ret;
-}
-
-/* Function to extract PRAGMA from sqlite db
- * Input:
- * void *db_conn : Sqlite connection
- * char *pragma_key : PRAGMA or setting to be extracted
- * char **pragma_value : the value of the PRAGMA or setting that is
- * extracted. This function will allocate memory
- * to pragma_value. The caller should free the memory
- * Return:
- * On success return the length of the pragma/setting value that is
- * extracted.
- * On failure return -1
- * */
-int
-gf_sqlite3_pragma(void *db_conn, char *pragma_key, char **pragma_value)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = db_conn;
- sqlite3_stmt *pre_stmt = NULL;
- char *sqlstring = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pragma_key, out);
-
- ret = gf_asprintf(&sqlstring, "PRAGMA %s;", pragma_key);
- if (ret <= 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed allocating memory");
- goto out;
- }
-
- ret = sqlite3_prepare_v2(sql_conn->sqlite3_db_conn, sqlstring, -1,
- &pre_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed init prepare stmt %s", sqlite3_errmsg(db_conn));
- ret = -1;
- goto out;
- }
-
- ret = sqlite3_step(pre_stmt);
- if (ret != SQLITE_ROW) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_RECORD_FAILED,
- "Failed to get records "
- "from db : %s",
- sqlite3_errmsg(db_conn));
- ret = -1;
- goto out;
- }
-
- if (pragma_value) {
- ret = gf_asprintf(pragma_value, "%s", sqlite3_column_text(pre_stmt, 0));
- if (ret <= 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed to get %s from db", pragma_key);
- }
- }
-
- ret = 0;
-out:
- GF_FREE(sqlstring);
-
- sqlite3_finalize(pre_stmt);
-
- return ret;
-}
-
-/* Function to set PRAGMA to sqlite db
- * Input:
- * void *db_conn : Sqlite connection
- * char *pragma_key : PRAGMA to be set
- * char *pragma_value : the value of the PRAGMA
- * Return:
- * On success return 0
- * On failure return -1
- * */
-int
-gf_sqlite3_set_pragma(void *db_conn, char *pragma_key, char *pragma_value)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = db_conn;
- char sqlstring[GF_NAME_MAX] = "";
- char *db_pragma_value = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pragma_key, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pragma_value, out);
-
- GF_SQLITE3_SET_PRAGMA(sqlstring, pragma_key, "%s", pragma_value, ret, out);
-
- ret = gf_sqlite3_pragma(db_conn, pragma_key, &db_pragma_value);
- if (ret < 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_QUERY_FAILED,
- "Failed to get %s pragma", pragma_key);
- } else {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, 0, "Value set on DB %s : %s",
- pragma_key, db_pragma_value);
- }
- GF_FREE(db_pragma_value);
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-/* Function to vacuum of sqlite db
- * Input:
- * void *db_conn : Sqlite connection
- * gf_boolean_t compact_active : Is compaction on?
- * gf_boolean_t compact_mode_switched : Did we just flip the compaction switch?
- * Return:
- * On success return 0
- * On failure return -1
- * */
-int
-gf_sqlite3_vacuum(void *db_conn, gf_boolean_t compact_active,
- gf_boolean_t compact_mode_switched)
-{
- int ret = -1;
- gf_sql_connection_t *sql_conn = db_conn;
- char *sqlstring = NULL;
- char *sql_strerror = NULL;
- gf_boolean_t changing_pragma = _gf_true;
-
- CHECK_SQL_CONN(sql_conn, out);
-
- if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_NONE) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS,
- "VACUUM type is off: no VACUUM to do");
- goto out;
- }
-
- if (compact_mode_switched) {
- if (compact_active) { /* Then it was OFF before.
- So turn everything on */
- ret = 0;
- switch (GF_SQL_COMPACT_DEF) {
- case GF_SQL_COMPACT_FULL:
- ret = gf_sqlite3_set_pragma(db_conn, "auto_vacuum",
- GF_SQL_AV_FULL);
- break;
- case GF_SQL_COMPACT_INCR:
- ret = gf_sqlite3_set_pragma(db_conn, "auto_vacuum",
- GF_SQL_AV_INCR);
- break;
- case GF_SQL_COMPACT_MANUAL:
- changing_pragma = _gf_false;
- break;
- default:
- ret = -1;
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_COMPACT_FAILED, "VACUUM type undefined");
- goto out;
- break;
- }
-
- } else { /* Then it was ON before, so turn it all off */
- if (GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_FULL ||
- GF_SQL_COMPACT_DEF == GF_SQL_COMPACT_INCR) {
- ret = gf_sqlite3_set_pragma(db_conn, "auto_vacuum",
- GF_SQL_AV_NONE);
- } else {
- changing_pragma = _gf_false;
- }
- }
-
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_PREPARE_FAILED,
- "Failed to set the pragma");
- goto out;
- }
-
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS,
- "Turning compaction %i", GF_SQL_COMPACT_DEF);
-
- /* If we move from an auto_vacuum scheme to off, */
- /* or vice-versa, we must VACUUM to save the change. */
- /* In the case of a manual VACUUM scheme, we might as well */
- /* run a manual VACUUM now if we */
- if (changing_pragma || compact_active) {
- ret = gf_asprintf(&sqlstring, "VACUUM;");
- if (ret <= 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed allocating memory");
- goto out;
- }
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS,
- "Sealed with a VACUUM");
- }
- } else { /* We are active, so it's time to VACUUM */
- if (!compact_active) { /* Did we somehow enter an inconsistent
- state? */
- ret = -1;
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Tried to VACUUM when compaction inactive");
- goto out;
- }
-
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_COMPACT_STATUS,
- "Doing regular vacuum of type %i", GF_SQL_COMPACT_DEF);
-
- switch (GF_SQL_COMPACT_DEF) {
- case GF_SQL_COMPACT_INCR: /* INCR auto_vacuum */
- ret = gf_asprintf(&sqlstring, "PRAGMA incremental_vacuum;");
- if (ret <= 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_PREPARE_FAILED, "Failed allocating memory");
- goto out;
- }
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS,
- "Will commence an incremental VACUUM");
- break;
- /* (MANUAL) Invoke the VACUUM command */
- case GF_SQL_COMPACT_MANUAL:
- ret = gf_asprintf(&sqlstring, "VACUUM;");
- if (ret <= 0) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_PREPARE_FAILED, "Failed allocating memory");
- goto out;
- }
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS,
- "Will commence a VACUUM");
- break;
- /* (FULL) The database does the compaction itself. */
- /* We cannot do anything else, so we can leave */
- /* without sending anything to the database */
- case GF_SQL_COMPACT_FULL:
- ret = 0;
- goto success;
- /* Any other state must be an error. Note that OFF */
- /* cannot hit this statement since we immediately leave */
- /* in that case */
- default:
- ret = -1;
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_COMPACT_FAILED,
- "VACUUM type undefined");
- goto out;
- break;
- }
- }
-
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_TRACE, 0, LG_MSG_COMPACT_STATUS,
- "SQLString == %s", sqlstring);
-
- ret = sqlite3_exec(sql_conn->sqlite3_db_conn, sqlstring, NULL, NULL,
- &sql_strerror);
-
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_RECORD_FAILED,
- "Failed to vacuum "
- "the db : %s",
- sqlite3_errmsg(db_conn));
- ret = -1;
- goto out;
- }
-success:
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_INFO, 0, LG_MSG_COMPACT_STATUS,
- compact_mode_switched ? "Successfully changed VACUUM on/off"
- : "DB successfully VACUUM");
-out:
- GF_FREE(sqlstring);
-
- return ret;
-}
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3.h b/libglusterfs/src/gfdb/gfdb_sqlite3.h
deleted file mode 100644
index d8af800db3c..00000000000
--- a/libglusterfs/src/gfdb/gfdb_sqlite3.h
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef __GFDB_SQLITE3_H
-#define __GFDB_SQLITE3_H
-
-/*Sqlite3 header file*/
-#include <sqlite3.h>
-
-#include "logging.h"
-#include "gfdb_data_store_types.h"
-#include "gfdb_mem-types.h"
-#include "libglusterfs-messages.h"
-
-#define GF_STMT_SIZE_MAX 2048
-
-#define GF_DB_NAME "gfdb.db"
-#define GF_FILE_TABLE "GF_FILE_TB"
-#define GF_FILE_LINK_TABLE "GF_FLINK_TB"
-#define GF_MASTER_TABLE "sqlite_master"
-
-/*Since we have multiple tables to be created we put it in a transaction*/
-#define GF_CREATE_STMT(out_str) \
- do { \
- sprintf(out_str, "BEGIN; CREATE TABLE IF NOT EXISTS " GF_FILE_TABLE \
- "(GF_ID TEXT PRIMARY KEY NOT NULL, " \
- "W_SEC INTEGER NOT NULL DEFAULT 0, " \
- "W_MSEC INTEGER NOT NULL DEFAULT 0, " \
- "UW_SEC INTEGER NOT NULL DEFAULT 0, " \
- "UW_MSEC INTEGER NOT NULL DEFAULT 0, " \
- "W_READ_SEC INTEGER NOT NULL DEFAULT 0, " \
- "W_READ_MSEC INTEGER NOT NULL DEFAULT 0, " \
- "UW_READ_SEC INTEGER NOT NULL DEFAULT 0, " \
- "UW_READ_MSEC INTEGER NOT NULL DEFAULT 0, " \
- "WRITE_FREQ_CNTR INTEGER NOT NULL DEFAULT 1, " \
- "READ_FREQ_CNTR INTEGER NOT NULL DEFAULT 1); " \
- "CREATE TABLE IF NOT EXISTS " GF_FILE_LINK_TABLE \
- "(GF_ID TEXT NOT NULL, " \
- "GF_PID TEXT NOT NULL, " \
- "FNAME TEXT NOT NULL, " \
- "W_DEL_FLAG INTEGER NOT NULL DEFAULT 0, " \
- "LINK_UPDATE INTEGER NOT NULL DEFAULT 0, " \
- "PRIMARY KEY ( GF_ID, GF_PID, FNAME) " \
- ");" \
- "COMMIT;"); \
- ; \
- } while (0)
-
-#define GF_COL_TB_WSEC GF_FILE_TABLE "." GF_COL_WSEC
-#define GF_COL_TB_WMSEC GF_FILE_TABLE "." GF_COL_WMSEC
-#define GF_COL_TB_UWSEC GF_FILE_TABLE "." GF_COL_UWSEC
-#define GF_COL_TB_UWMSEC GF_FILE_TABLE "." GF_COL_UWMSEC
-#define GF_COL_TB_RWSEC GF_FILE_TABLE "." GF_COL_WSEC_READ
-#define GF_COL_TB_RWMSEC GF_FILE_TABLE "." GF_COL_WMSEC_READ
-#define GF_COL_TB_RUWSEC GF_FILE_TABLE "." GF_COL_UWSEC_READ
-#define GF_COL_TB_RUWMSEC GF_FILE_TABLE "." GF_COL_UWMSEC_READ
-#define GF_COL_TB_WFC GF_FILE_TABLE "." GF_COL_WRITE_FREQ_CNTR
-#define GF_COL_TB_RFC GF_FILE_TABLE "." GF_COL_READ_FREQ_CNTR
-
-/*******************************************************************************
- * SQLITE3 Connection details and PRAGMA
- * ****************************************************************************/
-
-#define GF_SQL_AV_NONE "none"
-#define GF_SQL_AV_FULL "full"
-#define GF_SQL_AV_INCR "incremental"
-
-#define GF_SQL_SYNC_OFF "off"
-#define GF_SQL_SYNC_NORMAL "normal"
-#define GF_SQL_SYNC_FULL "full"
-
-#define GF_SQL_JM_DELETE "delete"
-#define GF_SQL_JM_TRUNCATE "truncate"
-#define GF_SQL_JM_PERSIST "persist"
-#define GF_SQL_JM_MEMORY "memory"
-#define GF_SQL_JM_WAL "wal"
-#define GF_SQL_JM_OFF "off"
-
-#define GF_SQL_COMPACT_NONE 0
-#define GF_SQL_COMPACT_FULL 1
-#define GF_SQL_COMPACT_INCR 2
-#define GF_SQL_COMPACT_MANUAL 3
-
-#define GF_SQL_COMPACT_DEF GF_SQL_COMPACT_INCR
-typedef enum gf_sql_auto_vacuum {
- gf_sql_av_none = 0,
- gf_sql_av_full,
- gf_sql_av_incr,
- gf_sql_av_invalid
-} gf_sql_auto_vacuum_t;
-
-typedef enum gf_sql_sync {
- gf_sql_sync_off = 0,
- gf_sql_sync_normal,
- gf_sql_sync_full,
- gf_sql_sync_invalid
-} gf_sql_sync_t;
-
-typedef enum gf_sql_journal_mode {
- gf_sql_jm_wal = 0,
- gf_sql_jm_delete,
- gf_sql_jm_truncate,
- gf_sql_jm_persist,
- gf_sql_jm_memory,
- gf_sql_jm_off,
- gf_sql_jm_invalid
-} gf_sql_journal_mode_t;
-
-typedef struct gf_sql_connection {
- char sqlite3_db_path[PATH_MAX];
- sqlite3 *sqlite3_db_conn;
- ssize_t cache_size;
- ssize_t page_size;
- ssize_t wal_autocheckpoint;
- gf_sql_journal_mode_t journal_mode;
- gf_sql_sync_t synchronous;
- gf_sql_auto_vacuum_t auto_vacuum;
-} gf_sql_connection_t;
-
-#define CHECK_SQL_CONN(sql_conn, out) \
- do { \
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, sql_conn, out); \
- if (!sql_conn->sqlite3_db_conn) { \
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, \
- LG_MSG_CONNECTION_INIT_FAILED, \
- "sqlite3 connection not initialized"); \
- goto out; \
- }; \
- } while (0)
-
-#define GF_SQLITE3_SET_PRAGMA(sqlite3_config_str, param_key, format, value, \
- ret, error) \
- do { \
- sprintf(sqlite3_config_str, "PRAGMA %s = " format, param_key, value); \
- ret = sqlite3_exec(sql_conn->sqlite3_db_conn, sqlite3_config_str, \
- NULL, NULL, NULL); \
- if (ret != SQLITE_OK) { \
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_EXEC_FAILED, \
- "Failed executing: %s : %s", sqlite3_config_str, \
- sqlite3_errmsg(sql_conn->sqlite3_db_conn)); \
- ret = -1; \
- goto error; \
- }; \
- } while (0)
-
-/************************SQLITE3 PARAMS KEYS***********************************/
-#define GFDB_SQL_PARAM_DBPATH "sql-db-path"
-#define GFDB_SQL_PARAM_CACHE_SIZE "sql-db-cachesize"
-#define GFDB_SQL_PARAM_PAGE_SIZE "sql-db-pagesize"
-#define GFDB_SQL_PARAM_JOURNAL_MODE "sql-db-journalmode"
-#define GFDB_SQL_PARAM_WAL_AUTOCHECK "sql-db-wal-autocheckpoint"
-#define GFDB_SQL_PARAM_SYNC "sql-db-sync"
-#define GFDB_SQL_PARAM_AUTO_VACUUM "sql-db-autovacuum"
-
-#define GF_SQL_DEFAULT_DBPATH ""
-#define GF_SQL_DEFAULT_PAGE_SIZE "4096"
-#define GF_SQL_DEFAULT_CACHE_SIZE "12500"
-#define GF_SQL_DEFAULT_WAL_AUTOCHECKPOINT "25000"
-#define GF_SQL_DEFAULT_JOURNAL_MODE GF_SQL_JM_WAL
-#define GF_SQL_DEFAULT_SYNC GF_SQL_SYNC_OFF
-#define GF_SQL_DEFAULT_AUTO_VACUUM GF_SQL_AV_NONE
-
-/* Defines the indexs for sqlite params
- * The order should be maintained*/
-typedef enum sqlite_param_index {
- sql_dbpath_ix = 0,
- sql_pagesize_ix,
- sql_cachesize_ix,
- sql_journalmode_ix,
- sql_walautocheck_ix,
- sql_dbsync_ix,
- sql_autovacuum_ix,
- /*This should be in the end*/
- sql_index_max
-} sqlite_param_index_t;
-
-/* Array to hold the sqlite param keys
- * The order should be maintained as sqlite_param_index_t*/
-static char *sqlite_params_keys[] = {
- GFDB_SQL_PARAM_DBPATH, GFDB_SQL_PARAM_PAGE_SIZE,
- GFDB_SQL_PARAM_CACHE_SIZE, GFDB_SQL_PARAM_JOURNAL_MODE,
- GFDB_SQL_PARAM_WAL_AUTOCHECK, GFDB_SQL_PARAM_SYNC,
- GFDB_SQL_PARAM_AUTO_VACUUM};
-
-/* Array of default values for sqlite params
- * The order should be maintained as sqlite_param_index_t*/
-static char *sqlite_params_default_value[] = {GF_SQL_DEFAULT_DBPATH,
- GF_SQL_DEFAULT_PAGE_SIZE,
- GF_SQL_DEFAULT_CACHE_SIZE,
- GF_SQL_DEFAULT_JOURNAL_MODE,
- GF_SQL_DEFAULT_WAL_AUTOCHECKPOINT,
- GF_SQL_DEFAULT_SYNC,
- GF_SQL_DEFAULT_AUTO_VACUUM};
-
-/*Extract sql params from page_size to auto_vacumm
- * The dbpath is extracted in a different way*/
-static inline int
-gfdb_set_sql_params(char *comp_name, dict_t *from_dict, dict_t *to_dict)
-{
- sqlite_param_index_t sql_index = sql_pagesize_ix;
- char *_val_str = NULL;
- int ret = -1;
-
- GF_ASSERT(comp_name);
- GF_ASSERT(from_dict);
- GF_ASSERT(to_dict);
-
- /*Extract and Set of the sql params from page_size*/
- for (sql_index = sql_pagesize_ix; sql_index < sql_index_max; sql_index++) {
- _val_str = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(comp_name, from_dict,
- sqlite_params_keys[sql_index], _val_str,
- sqlite_params_default_value[sql_index]);
- SET_DB_PARAM_TO_DICT(comp_name, to_dict, sqlite_params_keys[sql_index],
- _val_str, ret, out);
- }
-out:
- return ret;
-}
-
-/*************************SQLITE3 GFDB PLUGINS*********************************/
-
-/*Db init and fini modules*/
-int
-gf_sqlite3_fini(void **db_conn);
-int
-gf_sqlite3_init(dict_t *args, void **db_conn);
-
-/*insert/update/delete modules*/
-int
-gf_sqlite3_insert(void *db_conn, gfdb_db_record_t *);
-int
-gf_sqlite3_delete(void *db_conn, gfdb_db_record_t *);
-
-/*querying modules*/
-int
-gf_sqlite3_find_all(void *db_conn, gf_query_callback_t, void *_query_cbk_args,
- int query_limit);
-int
-gf_sqlite3_find_unchanged_for_time(void *db_conn,
- gf_query_callback_t query_callback,
- void *_query_cbk_args,
- gfdb_time_t *for_time);
-int
-gf_sqlite3_find_recently_changed_files(void *db_conn,
- gf_query_callback_t query_callback,
- void *_query_cbk_args,
- gfdb_time_t *from_time);
-int
-gf_sqlite3_find_unchanged_for_time_freq(void *db_conn,
- gf_query_callback_t query_callback,
- void *_query_cbk_args,
- gfdb_time_t *for_time,
- int write_freq_cnt, int read_freq_cnt,
- gf_boolean_t clear_counters);
-int
-gf_sqlite3_find_recently_changed_files_freq(
- void *db_conn, gf_query_callback_t query_callback, void *_query_cbk_args,
- gfdb_time_t *from_time, int write_freq_cnt, int read_freq_cnt,
- gf_boolean_t clear_counters);
-
-int
-gf_sqlite3_clear_files_heat(void *db_conn);
-
-/* Function to extract version of sqlite db
- * Input:
- * void *db_conn : Sqlite connection
- * char **version : the version is extracted as a string and will be stored in
- * this variable. The freeing of the memory should be done by
- * the caller.
- * Return:
- * On success return the length of the version string that is
- * extracted.
- * On failure return -1
- * */
-int
-gf_sqlite3_version(void *db_conn, char **version);
-
-/* Function to extract PRAGMA or setting from sqlite db
- * Input:
- * void *db_conn : Sqlite connection
- * char *pragma_key : PRAGMA or setting to be extracted
- * char **pragma_value : the value of the PRAGMA or setting that is
- * extracted. This function will allocate memory
- * to pragma_value. The caller should free the memory
- * Return:
- * On success return the length of the pragma/setting value that is
- * extracted.
- * On failure return -1
- * */
-int
-gf_sqlite3_pragma(void *db_conn, char *pragma_key, char **pragma_value);
-
-/* Function to set PRAGMA to sqlite db
- * Input:
- * void *db_conn : Sqlite connection
- * char *pragma_key : PRAGMA to be set
- * char *pragma_value : the value of the PRAGMA
- * Return:
- * On success return 0
- * On failure return -1
- * */
-int
-gf_sqlite3_set_pragma(void *db_conn, char *pragma_key, char *pragma_value);
-
-/* Function to vacuum of sqlite db
- * Input:
- * void *db_conn : Sqlite connection
- * gf_boolean_t compact_active : Is compaction on?
- * gf_boolean_t compact_mode_switched : Did we just flip the compaction switch?
- * Return:
- * On success return 0
- * On failure return -1
- * */
-int
-gf_sqlite3_vacuum(void *db_conn, gf_boolean_t compact_active,
- gf_boolean_t compact_mode_switched);
-
-void
-gf_sqlite3_fill_db_operations(gfdb_db_operations_t *gfdb_db_ops);
-
-#endif
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3_helper.c b/libglusterfs/src/gfdb/gfdb_sqlite3_helper.c
deleted file mode 100644
index 60dd5e25e66..00000000000
--- a/libglusterfs/src/gfdb/gfdb_sqlite3_helper.c
+++ /dev/null
@@ -1,1260 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "gfdb_sqlite3_helper.h"
-
-#define GFDB_SQL_STMT_SIZE 256
-
-/*****************************************************************************
- *
- * Helper function to execute actual sql queries
- *
- *
- * ****************************************************************************/
-
-static int
-gf_sql_delete_all(gf_sql_connection_t *sql_conn, char *gfid,
- gf_boolean_t ignore_errors)
-{
- int ret = -1;
- sqlite3_stmt *delete_file_stmt = NULL;
- sqlite3_stmt *delete_link_stmt = NULL;
- char *delete_link_str = "DELETE FROM " GF_FILE_LINK_TABLE
- " WHERE GF_ID = ? ;";
- char *delete_file_str = "DELETE FROM " GF_FILE_TABLE " WHERE GF_ID = ? ;";
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out);
-
- /*
- * Delete all links associated with this GFID
- *
- * */
- /*Prepare statement for delete all links*/
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, delete_link_str, -1,
- &delete_link_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed preparing delete "
- "statement %s : %s",
- delete_link_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind gfid*/
- ret = sqlite3_bind_text(delete_link_stmt, 1, gfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding gfid %s : %s", gfid,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the prepare statement*/
- if (sqlite3_step(delete_link_stmt) != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_EXEC_FAILED,
- "Failed executing the prepared stmt %s : %s", delete_link_str,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*
- * Delete entry from file table associated with this GFID
- *
- * */
- /*Prepare statement for delete all links*/
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, delete_file_str, -1,
- &delete_file_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed preparing delete "
- "statement %s : %s",
- delete_file_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind gfid*/
- ret = sqlite3_bind_text(delete_file_stmt, 1, gfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding gfid %s : %s", gfid,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the prepare statement*/
- if (sqlite3_step(delete_file_stmt) != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_EXEC_FAILED,
- "Failed executing the prepared stmt %s : %s", delete_file_str,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
-out:
- /*Free prepared statement*/
- sqlite3_finalize(delete_file_stmt);
- sqlite3_finalize(delete_link_stmt);
- return ret;
-}
-
-static int
-gf_sql_delete_link(gf_sql_connection_t *sql_conn, char *gfid, char *pargfid,
- char *basename, gf_boolean_t ignore_errors)
-{
- int ret = -1;
- sqlite3_stmt *delete_stmt = NULL;
- char *delete_str = "DELETE FROM " GF_FILE_LINK_TABLE
- " WHERE GF_ID = ? AND GF_PID = ?"
- " AND FNAME = ?;";
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pargfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, basename, out);
-
- /*Prepare statement*/
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, delete_str, -1,
- &delete_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed preparing delete "
- "statement %s : %s",
- delete_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind gfid*/
- ret = sqlite3_bind_text(delete_stmt, 1, gfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding gfid %s : %s", gfid,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind pargfid*/
- ret = sqlite3_bind_text(delete_stmt, 2, pargfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding parent gfid %s "
- ": %s",
- pargfid, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind basename*/
- ret = sqlite3_bind_text(delete_stmt, 3, basename, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding basename %s : "
- "%s",
- basename, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the prepare statement*/
- if (sqlite3_step(delete_stmt) != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_EXEC_FAILED,
- "Failed executing the prepared stmt %s : %s", delete_str,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- /*Free prepared statement*/
- sqlite3_finalize(delete_stmt);
- return ret;
-}
-
-static int
-gf_sql_update_link_flags(gf_sql_connection_t *sql_conn, char *gfid,
- char *pargfid, char *basename, int update_flag,
- gf_boolean_t is_update_or_delete,
- gf_boolean_t ignore_errors)
-{
- int ret = -1;
- sqlite3_stmt *update_stmt = NULL;
- char *update_column = NULL;
- char update_str[1024] = "";
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pargfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, basename, out);
-
- update_column = (is_update_or_delete) ? "LINK_UPDATE" : "W_DEL_FLAG";
-
- sprintf(update_str,
- "UPDATE " GF_FILE_LINK_TABLE
- " SET %s = ?"
- " WHERE GF_ID = ? AND GF_PID = ? AND FNAME = ?;",
- update_column);
-
- /*Prepare statement*/
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, update_str, -1,
- &update_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed preparing update "
- "statement %s : %s",
- update_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind link_update*/
- ret = sqlite3_bind_int(update_stmt, 1, update_flag);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding update_flag %d "
- ": %s",
- update_flag, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind gfid*/
- ret = sqlite3_bind_text(update_stmt, 2, gfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding gfid %s : %s", gfid,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind pargfid*/
- ret = sqlite3_bind_text(update_stmt, 3, pargfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding parent gfid %s "
- ": %s",
- pargfid, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind basename*/
- ret = sqlite3_bind_text(update_stmt, 4, basename, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding basename %s : "
- "%s",
- basename, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the prepare statement*/
- if (sqlite3_step(update_stmt) != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_EXEC_FAILED,
- "Failed executing the prepared stmt %s : %s", update_str,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- /*Free prepared statement*/
- sqlite3_finalize(update_stmt);
- return ret;
-}
-
-static int
-gf_sql_insert_link(gf_sql_connection_t *sql_conn, char *gfid, char *pargfid,
- char *basename, gf_boolean_t link_consistency,
- gf_boolean_t ignore_errors)
-{
- int ret = -1;
- sqlite3_stmt *insert_stmt = NULL;
- char insert_str[GFDB_SQL_STMT_SIZE] = "";
-
- sprintf(insert_str,
- "INSERT INTO " GF_FILE_LINK_TABLE
- " (GF_ID, GF_PID, FNAME,"
- " W_DEL_FLAG, LINK_UPDATE) "
- " VALUES (?, ?, ?, 0, %d);",
- link_consistency);
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pargfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, basename, out);
-
- /*Prepare statement*/
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, insert_str, -1,
- &insert_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed preparing insert "
- "statement %s : %s",
- insert_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind gfid*/
- ret = sqlite3_bind_text(insert_stmt, 1, gfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding gfid %s : %s", gfid,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind pargfid*/
- ret = sqlite3_bind_text(insert_stmt, 2, pargfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding parent gfid %s "
- ": %s",
- pargfid, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind basename*/
- ret = sqlite3_bind_text(insert_stmt, 3, basename, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding basename %s : %s", basename,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the prepare statement*/
- if (sqlite3_step(insert_stmt) != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_EXEC_FAILED,
- "Failed executing the prepared "
- "stmt %s %s %s %s : %s",
- gfid, pargfid, basename, insert_str,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- /*Free prepared statement*/
- sqlite3_finalize(insert_stmt);
- return ret;
-}
-
-static int
-gf_sql_update_link(gf_sql_connection_t *sql_conn, char *gfid, char *pargfid,
- char *basename, char *old_pargfid, char *old_basename,
- gf_boolean_t link_consistency, gf_boolean_t ignore_errors)
-{
- int ret = -1;
- sqlite3_stmt *insert_stmt = NULL;
- char insert_str[GFDB_SQL_STMT_SIZE] = "";
-
- sprintf(insert_str,
- "INSERT INTO " GF_FILE_LINK_TABLE
- " (GF_ID, GF_PID, FNAME,"
- " W_DEL_FLAG, LINK_UPDATE) "
- " VALUES (? , ?, ?, 0, %d);",
- link_consistency);
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, pargfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, basename, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, old_pargfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, old_basename, out);
-
- /*
- *
- * Delete the old link
- *
- * */
- ret = gf_sql_delete_link(sql_conn, gfid, old_pargfid, old_basename,
- ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_DELETE_FAILED, "Failed deleting old link");
- goto out;
- }
-
- /*
- *
- * insert new link
- *
- * */
- /*Prepare statement*/
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, insert_str, -1,
- &insert_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed preparing insert "
- "statement %s : %s",
- insert_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind gfid*/
- ret = sqlite3_bind_text(insert_stmt, 1, gfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding gfid %s : %s", gfid,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind new pargfid*/
- ret = sqlite3_bind_text(insert_stmt, 2, pargfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding parent gfid %s "
- ": %s",
- pargfid, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind new basename*/
- ret = sqlite3_bind_text(insert_stmt, 3, basename, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding basename %s : "
- "%s",
- basename, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the prepare statement*/
- if (sqlite3_step(insert_stmt) != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_EXEC_FAILED,
- "Failed executing the prepared stmt %s : %s", insert_str,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- /*Free prepared statement*/
- sqlite3_finalize(insert_stmt);
- return ret;
-}
-
-static int
-gf_sql_insert_write_wind_time(gf_sql_connection_t *sql_conn, char *gfid,
- gfdb_time_t *wind_time,
- gf_boolean_t ignore_errors)
-{
- int ret = -1;
- sqlite3_stmt *insert_stmt = NULL;
- char *insert_str = "INSERT INTO " GF_FILE_TABLE
- "(GF_ID, W_SEC, W_MSEC, UW_SEC, UW_MSEC)"
- " VALUES (?, ?, ?, 0, 0);";
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, wind_time, out);
-
- /*Prepare statement*/
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, insert_str, -1,
- &insert_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed preparing insert "
- "statement %s : %s",
- insert_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind gfid*/
- ret = sqlite3_bind_text(insert_stmt, 1, gfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding gfid %s : %s", gfid,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind wind secs*/
- ret = sqlite3_bind_int(insert_stmt, 2, wind_time->tv_sec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding parent wind "
- "secs %ld : %s",
- wind_time->tv_sec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind wind msecs*/
- ret = sqlite3_bind_int(insert_stmt, 3, wind_time->tv_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding parent wind "
- "msecs %ld : %s",
- wind_time->tv_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the prepare statement*/
- if (sqlite3_step(insert_stmt) != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_EXEC_FAILED,
- "Failed executing the prepared stmt GFID:%s %s : %s", gfid,
- insert_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- /*Free prepared statement*/
- sqlite3_finalize(insert_stmt);
- return ret;
-}
-
-/*Update write/read times for both wind and unwind*/
-static int
-gf_update_time(gf_sql_connection_t *sql_conn, char *gfid,
- gfdb_time_t *update_time, gf_boolean_t record_counter,
- gf_boolean_t is_wind, gf_boolean_t is_read,
- gf_boolean_t ignore_errors)
-{
- int ret = -1;
- sqlite3_stmt *update_stmt = NULL;
- char update_str[1024] = "";
- char *freq_cntr_str = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfid, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, update_time, out);
-
- /*
- * Constructing the prepare statement string.
- *
- * */
- /*For write time*/
- if (!is_read) {
- if (is_wind) {
- /*if record counter is on*/
- freq_cntr_str = (record_counter)
- ? ", WRITE_FREQ_CNTR = WRITE_FREQ_CNTR + 1"
- : "";
-
- /*Perfectly safe as we will not go array of bound*/
- sprintf(update_str,
- "UPDATE " GF_FILE_TABLE
- " SET W_SEC = ?, W_MSEC = ? "
- " %s" /*place for read freq counters*/
- " WHERE GF_ID = ? ;",
- freq_cntr_str);
- } else {
- /*Perfectly safe as we will not go array of bound*/
- sprintf(update_str,
- "UPDATE " GF_FILE_TABLE " SET UW_SEC = ?, UW_MSEC = ? ;");
- }
- }
- /*For Read Time update*/
- else {
- if (is_wind) {
- /*if record counter is on*/
- freq_cntr_str = (record_counter)
- ? ", READ_FREQ_CNTR = READ_FREQ_CNTR + 1"
- : "";
-
- /*Perfectly safe as we will not go array of bound*/
- sprintf(update_str,
- "UPDATE " GF_FILE_TABLE
- " SET W_READ_SEC = ?, W_READ_MSEC = ? "
- " %s" /*place for read freq counters*/
- " WHERE GF_ID = ? ;",
- freq_cntr_str);
- } else {
- /*Perfectly safe as we will not go array of bound*/
- sprintf(update_str, "UPDATE " GF_FILE_TABLE
- " SET UW_READ_SEC = ?, UW_READ_MSEC = ? ;");
- }
- }
-
- /*Prepare statement*/
- ret = sqlite3_prepare(sql_conn->sqlite3_db_conn, update_str, -1,
- &update_stmt, 0);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PREPARE_FAILED,
- "Failed preparing insert "
- "statement %s : %s",
- update_str, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind time secs*/
- ret = sqlite3_bind_int(update_stmt, 1, update_time->tv_sec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding parent wind "
- "secs %ld : %s",
- update_time->tv_sec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind time msecs*/
- ret = sqlite3_bind_int(update_stmt, 2, update_time->tv_usec);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding parent wind "
- "msecs %ld : %s",
- update_time->tv_usec, sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Bind gfid*/
- ret = sqlite3_bind_text(update_stmt, 3, gfid, -1, NULL);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_BINDING_FAILED,
- "Failed binding gfid %s : %s", gfid,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- /*Execute the prepare statement*/
- if (sqlite3_step(update_stmt) != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, _gfdb_log_level(GF_LOG_ERROR, ignore_errors),
- 0, LG_MSG_EXEC_FAILED,
- "Failed executing the prepared stmt %s : %s", update_str,
- sqlite3_errmsg(sql_conn->sqlite3_db_conn));
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- /*Free prepared statement*/
- sqlite3_finalize(update_stmt);
- return ret;
-}
-
-/******************************************************************************
- *
- * Helper functions for gf_sqlite3_insert()
- *
- *
- * ****************************************************************************/
-
-int
-gf_sql_insert_wind(gf_sql_connection_t *sql_conn,
- gfdb_db_record_t *gfdb_db_record)
-{
- int ret = -1;
- gfdb_time_t *modtime = NULL;
- char *pargfid_str = NULL;
- char *gfid_str = NULL;
- char *old_pargfid_str = NULL;
- gf_boolean_t its_wind = _gf_true; /*remains true for this function*/
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out);
-
- gfid_str = gf_strdup(uuid_utoa(gfdb_db_record->gfid));
- if (!gfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Creating gfid string failed.");
- goto out;
- }
-
- modtime = &gfdb_db_record->gfdb_wind_change_time;
-
- /* handle all dentry based operations */
- if (isdentryfop(gfdb_db_record->gfdb_fop_type)) {
- /*Parent GFID is always set*/
- pargfid_str = gf_strdup(uuid_utoa(gfdb_db_record->pargfid));
- if (!pargfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Creating gfid string "
- "failed.");
- goto out;
- }
-
- /* handle create, mknod */
- if (isdentrycreatefop(gfdb_db_record->gfdb_fop_type)) {
- /*insert link*/
- ret = gf_sql_insert_link(
- sql_conn, gfid_str, pargfid_str, gfdb_db_record->file_name,
- gfdb_db_record->link_consistency, _gf_true);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_WARNING,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_INSERT_FAILED,
- "Failed "
- "inserting link in DB");
- /* Even if link creation is failed we
- * continue with the creation of file record.
- * This covers to cases
- * 1) Lookup heal: If the file record from
- * gf_file_tb is deleted but the link record
- * still exist. Lookup heal will attempt a heal
- * with create_wind set. The link heal will fail
- * as there is already a record and if we don't
- * ignore the error we will not heal the
- * gf_file_tb.
- * 2) Rename file in cold tier: During a rename
- * of a file that is there in cold tier. We get
- * an link record created in hot tier for the
- * linkto file. When the file gets heated and
- * moves to hot tier there will be attempt from
- * ctr lookup heal to create link and file
- * record and If we don't ignore the error we
- * will not heal the gf_file_tb.
- * */
- }
- gfdb_db_record->islinkupdate = gfdb_db_record->link_consistency;
-
- /*
- * Only for create/mknod insert wind time
- * for the first time
- * */
- ret = gf_sql_insert_write_wind_time(sql_conn, gfid_str, modtime,
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_INSERT_FAILED,
- "Failed inserting wind time in DB");
- goto out;
- }
- goto out;
- }
- /*handle rename, link */
- else {
- /*rename*/
- if (strlen(gfdb_db_record->old_file_name) != 0) {
- old_pargfid_str = gf_strdup(
- uuid_utoa(gfdb_db_record->old_pargfid));
- if (!old_pargfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_CREATE_FAILED,
- "Creating gfid string failed.");
- goto out;
- }
- ret = gf_sql_update_link(
- sql_conn, gfid_str, pargfid_str, gfdb_db_record->file_name,
- old_pargfid_str, gfdb_db_record->old_file_name,
- gfdb_db_record->link_consistency,
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_UPDATE_FAILED, "Failed updating link");
- goto out;
- }
- gfdb_db_record->islinkupdate = gfdb_db_record->link_consistency;
- }
- /*link*/
- else {
- ret = gf_sql_insert_link(sql_conn, gfid_str, pargfid_str,
- gfdb_db_record->file_name,
- gfdb_db_record->link_consistency,
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_INSERT_FAILED,
- "Failed inserting link in DB");
- goto out;
- }
- gfdb_db_record->islinkupdate = gfdb_db_record->link_consistency;
- }
- }
- }
-
- /* update times only when said!*/
- if (gfdb_db_record->do_record_times) {
- /*All fops update times read or write*/
- ret = gf_update_time(sql_conn, gfid_str, modtime,
- gfdb_db_record->do_record_counters, its_wind,
- isreadfop(gfdb_db_record->gfdb_fop_type),
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors),
- 0, LG_MSG_UPDATE_FAILED,
- "Failed update wind time"
- " in DB");
- goto out;
- }
- }
-
- ret = 0;
-out:
- GF_FREE(gfid_str);
- GF_FREE(pargfid_str);
- GF_FREE(old_pargfid_str);
- return ret;
-}
-
-int
-gf_sql_insert_unwind(gf_sql_connection_t *sql_conn,
- gfdb_db_record_t *gfdb_db_record)
-{
- int ret = -1;
- gfdb_time_t *modtime = NULL;
- gf_boolean_t its_wind = _gf_true; /*remains true for this function*/
- char *gfid_str = NULL;
- char *pargfid_str = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out);
-
- gfid_str = gf_strdup(uuid_utoa(gfdb_db_record->gfid));
- if (!gfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Creating gfid string failed.");
- goto out;
- }
-
- /*Only update if recording unwind is set*/
- if (gfdb_db_record->do_record_times &&
- gfdb_db_record->do_record_uwind_time) {
- modtime = &gfdb_db_record->gfdb_unwind_change_time;
- ret = gf_update_time(sql_conn, gfid_str, modtime,
- gfdb_db_record->do_record_counters, (!its_wind),
- isreadfop(gfdb_db_record->gfdb_fop_type),
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors),
- 0, LG_MSG_UPDATE_FAILED,
- "Failed update unwind "
- "time in DB");
- goto out;
- }
- }
-
- /*For link creation and changes we use link updated*/
- if (gfdb_db_record->islinkupdate &&
- isdentryfop(gfdb_db_record->gfdb_fop_type)) {
- pargfid_str = gf_strdup(uuid_utoa(gfdb_db_record->pargfid));
- if (!pargfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Creating pargfid_str string failed.");
- goto out;
- }
-
- ret = gf_sql_update_link_flags(sql_conn, gfid_str, pargfid_str,
- gfdb_db_record->file_name, 0, _gf_true,
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors),
- 0, LG_MSG_UPDATE_FAILED,
- "Failed updating link flags in unwind");
- goto out;
- }
- }
-
- ret = 0;
-out:
- GF_FREE(gfid_str);
- GF_FREE(pargfid_str);
- return ret;
-}
-
-int
-gf_sql_update_delete_wind(gf_sql_connection_t *sql_conn,
- gfdb_db_record_t *gfdb_db_record)
-{
- int ret = -1;
- char *gfid_str = NULL;
- char *pargfid_str = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out);
-
- gfid_str = gf_strdup(uuid_utoa(gfdb_db_record->gfid));
- if (!gfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Creating gfid string failed.");
- goto out;
- }
-
- pargfid_str = gf_strdup(uuid_utoa(gfdb_db_record->pargfid));
- if (!pargfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Creating pargfid_str "
- "string failed.");
- goto out;
- }
-
- if (gfdb_db_record->link_consistency) {
- ret = gf_sql_update_link_flags(sql_conn, gfid_str, pargfid_str,
- gfdb_db_record->file_name, 1, _gf_false,
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR, gfdb_db_record->ignore_errors),
- 0, LG_MSG_UPDATE_FAILED,
- "Failed updating link flags in wind");
- goto out;
- }
- }
-
- ret = 0;
-out:
- GF_FREE(gfid_str);
- GF_FREE(pargfid_str);
- return ret;
-}
-
-int
-gf_sql_delete_unwind(gf_sql_connection_t *sql_conn,
- gfdb_db_record_t *gfdb_db_record)
-{
- int ret = -1;
- char *gfid_str = NULL;
- char *pargfid_str = NULL;
- gfdb_time_t *modtime = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, gfdb_db_record, out);
-
- gfid_str = gf_strdup(uuid_utoa(gfdb_db_record->gfid));
- if (!gfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Creating gfid string failed.");
- goto out;
- }
-
- /*Nuke all the entries for this GFID from DB*/
- if (gfdb_db_record->gfdb_fop_path == GFDB_FOP_UNDEL_ALL) {
- gf_sql_delete_all(sql_conn, gfid_str, gfdb_db_record->ignore_errors);
- }
- /*Remove link entries only*/
- else if (gfdb_db_record->gfdb_fop_path == GFDB_FOP_UNDEL) {
- pargfid_str = gf_strdup(uuid_utoa(gfdb_db_record->pargfid));
- if (!pargfid_str) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_CREATE_FAILED,
- "Creating pargfid_str "
- "string failed.");
- goto out;
- }
-
- /* Special performance case:
- * Updating wind time in unwind for delete. This is done here
- * as in the wind path we will not know whether its the last
- * link or not. For a last link there is not use to update any
- * wind or unwind time!*/
- if (gfdb_db_record->do_record_times) {
- /*Update the wind write times*/
- modtime = &gfdb_db_record->gfdb_wind_change_time;
- ret = gf_update_time(sql_conn, gfid_str, modtime,
- gfdb_db_record->do_record_counters, _gf_true,
- isreadfop(gfdb_db_record->gfdb_fop_type),
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_UPDATE_FAILED,
- "Failed update wind time in DB");
- goto out;
- }
- }
-
- modtime = &gfdb_db_record->gfdb_unwind_change_time;
-
- ret = gf_sql_delete_link(sql_conn, gfid_str, pargfid_str,
- gfdb_db_record->file_name,
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_DELETE_FAILED,
- "Failed deleting link");
- goto out;
- }
-
- if (gfdb_db_record->do_record_times &&
- gfdb_db_record->do_record_uwind_time) {
- ret = gf_update_time(sql_conn, gfid_str, modtime,
- gfdb_db_record->do_record_counters, _gf_false,
- isreadfop(gfdb_db_record->gfdb_fop_type),
- gfdb_db_record->ignore_errors);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3,
- _gfdb_log_level(GF_LOG_ERROR,
- gfdb_db_record->ignore_errors),
- 0, LG_MSG_UPDATE_FAILED,
- "Failed update unwind time in DB");
- goto out;
- }
- }
- } else {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_INVALID_UPLINK,
- "Invalid unlink option");
- goto out;
- }
- ret = 0;
-out:
- GF_FREE(gfid_str);
- GF_FREE(pargfid_str);
- return ret;
-}
-
-/******************************************************************************
- *
- * Find/Query helper functions
- *
- * ****************************************************************************/
-int
-gf_sql_query_function(sqlite3_stmt *prep_stmt,
- gf_query_callback_t query_callback, void *_query_cbk_args)
-{
- int ret = -1;
- gfdb_query_record_t *query_record = NULL;
- char *text_column = NULL;
- sqlite3 *db_conn = NULL;
- uuid_t prev_gfid = {0};
- uuid_t curr_gfid = {0};
- uuid_t pgfid = {0};
- char *base_name = NULL;
- gf_boolean_t is_first_record = _gf_true;
- gf_boolean_t is_query_empty = _gf_true;
-
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, prep_stmt, out);
- GF_VALIDATE_OR_GOTO(GFDB_STR_SQLITE3, query_callback, out);
-
- db_conn = sqlite3_db_handle(prep_stmt);
-
- /*
- * Loop to access queried rows
- * Each db record will have 3 columns
- * GFID, PGFID, FILE_NAME
- *
- * For file with multiple hard links we will get multiple query rows
- * with the same GFID, but different PGID and FILE_NAME Combination
- * For Example if a file with
- * GFID = 00000000-0000-0000-0000-000000000006
- * has 3 hardlinks file1, file2 and file3 in 3 different folder
- * with GFID's
- * 00000000-0000-0000-0000-0000EFC00001,
- * 00000000-0000-0000-0000-00000ABC0001 and
- * 00000000-0000-0000-0000-00000ABC00CD
- * Then there will be 3 records
- * GFID : 00000000-0000-0000-0000-000000000006
- * PGFID : 00000000-0000-0000-0000-0000EFC00001
- * FILE_NAME : file1
- *
- * GFID : 00000000-0000-0000-0000-000000000006
- * PGFID : 00000000-0000-0000-0000-00000ABC0001
- * FILE_NAME : file2
- *
- * GFID : 00000000-0000-0000-0000-000000000006
- * PGFID : 00000000-0000-0000-0000-00000ABC00CD
- * FILE_NAME : file3
- *
- * This is retrieved and added to a single query_record
- *
- * query_record->gfid = 00000000-0000-0000-0000-000000000006
- * ->link_info = {00000000-0000-0000-0000-0000EFC00001,
- * "file1"}
- * |
- * V
- * link_info = {00000000-0000-0000-0000-00000ABC0001,
- * "file2"}
- * |
- * V
- * link_info = {00000000-0000-0000-0000-00000ABC0001,
- * "file3",
- * list}
- *
- * This query record is sent to the registered query_callback()
- *
- * */
- while ((ret = sqlite3_step(prep_stmt)) == SQLITE_ROW) {
- if (sqlite3_column_count(prep_stmt) > 0) {
- is_query_empty = _gf_false;
-
- /*Retrieving GFID - column index is 0*/
- text_column = (char *)sqlite3_column_text(prep_stmt, 0);
- if (!text_column) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_ID_FAILED,
- "Failed to"
- "retrieve GFID");
- goto out;
- }
- ret = gf_uuid_parse(text_column, curr_gfid);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PARSE_FAILED,
- "Failed to parse "
- "GFID");
- goto out;
- }
-
- /*
- * if the previous record was not of the current gfid
- * call the call_back function and send the
- * query record, which will have all the link_info
- * objects associated with this gfid
- *
- * */
- if (gf_uuid_compare(curr_gfid, prev_gfid) != 0) {
- /* If this is not the first record */
- if (!is_first_record) {
- /*Call the call_back function provided*/
- ret = query_callback(query_record, _query_cbk_args);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_QUERY_CALL_BACK_FAILED,
- "Query call back "
- "failed");
- goto out;
- }
- }
-
- /*Clear the query record*/
- gfdb_query_record_free(query_record);
- query_record = NULL;
- query_record = gfdb_query_record_new();
- if (!query_record) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_CREATE_FAILED,
- "Failed to create "
- "query_record");
- goto out;
- }
-
- gf_uuid_copy(query_record->gfid, curr_gfid);
- gf_uuid_copy(prev_gfid, curr_gfid);
- }
-
- /* Get PGFID */
- text_column = (char *)sqlite3_column_text(prep_stmt, 1);
- if (!text_column) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_ID_FAILED,
- "Failed to"
- " retrieve GF_ID");
- goto out;
- }
- ret = gf_uuid_parse(text_column, pgfid);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_PARSE_FAILED,
- "Failed to parse "
- "GF_ID");
- goto out;
- }
-
- /* Get Base name */
- text_column = (char *)sqlite3_column_text(prep_stmt, 2);
- if (!text_column) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_ID_FAILED,
- "Failed to"
- " retrieve GF_ID");
- goto out;
- }
- base_name = text_column;
-
- /* Add link info to the list */
- ret = gfdb_add_link_to_query_record(query_record, pgfid, base_name);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_ID_FAILED,
- "Failed to"
- " add link info to query record");
- goto out;
- }
-
- is_first_record = _gf_false;
- }
- }
-
- if (ret != SQLITE_DONE) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_GET_RECORD_FAILED,
- "Failed to retrieve records "
- "from db : %s",
- sqlite3_errmsg(db_conn));
- ret = -1;
- goto out;
- }
-
- if (!is_query_empty) {
- /*
- * Call the call_back function for the last record from the
- * Database
- * */
- ret = query_callback(query_record, _query_cbk_args);
- if (ret) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0,
- LG_MSG_QUERY_CALL_BACK_FAILED, "Query call back failed");
- goto out;
- }
- }
-
- ret = 0;
-out:
- gfdb_query_record_free(query_record);
- query_record = NULL;
- return ret;
-}
-
-int
-gf_sql_clear_counters(gf_sql_connection_t *sql_conn)
-{
- int ret = -1;
- char *sql_strerror = NULL;
- char *query_str = NULL;
-
- CHECK_SQL_CONN(sql_conn, out);
-
- query_str = "UPDATE " GF_FILE_TABLE " SET " GF_COL_READ_FREQ_CNTR
- " = 0 , " GF_COL_WRITE_FREQ_CNTR " = 0 ;";
-
- ret = sqlite3_exec(sql_conn->sqlite3_db_conn, query_str, NULL, NULL,
- &sql_strerror);
- if (ret != SQLITE_OK) {
- gf_msg(GFDB_STR_SQLITE3, GF_LOG_ERROR, 0, LG_MSG_EXEC_FAILED,
- "Failed to execute: %s : %s", query_str, sql_strerror);
- sqlite3_free(sql_strerror);
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
diff --git a/libglusterfs/src/gfdb/gfdb_sqlite3_helper.h b/libglusterfs/src/gfdb/gfdb_sqlite3_helper.h
deleted file mode 100644
index f19344a353c..00000000000
--- a/libglusterfs/src/gfdb/gfdb_sqlite3_helper.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#ifndef __GFDB_SQLITE3_HELPER_H
-#define __GFDB_SQLITE3_HELPER_H
-
-#include "gfdb_sqlite3.h"
-
-/******************************************************************************
- *
- * Helper functions for gf_sqlite3_insert()
- *
- * ****************************************************************************/
-
-int
-gf_sql_insert_wind(gf_sql_connection_t *sql_conn,
- gfdb_db_record_t *gfdb_db_record);
-
-int
-gf_sql_insert_unwind(gf_sql_connection_t *sql_conn,
- gfdb_db_record_t *gfdb_db_record);
-
-int
-gf_sql_update_delete_wind(gf_sql_connection_t *sql_conn,
- gfdb_db_record_t *gfdb_db_record);
-
-int
-gf_sql_delete_unwind(gf_sql_connection_t *sql_conn,
- gfdb_db_record_t *gfdb_db_record);
-
-/******************************************************************************
- *
- * Find/Query helper functions
- *
- * ****************************************************************************/
-
-int
-gf_sql_query_function(sqlite3_stmt *prep_stmt,
- gf_query_callback_t query_callback,
- void *_query_cbk_args);
-
-int
-gf_sql_clear_counters(gf_sql_connection_t *sql_conn);
-
-#endif
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
index b9fdb9a39be..64a93802f76 100644
--- a/libglusterfs/src/gidcache.c
+++ b/libglusterfs/src/gidcache.c
@@ -8,8 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "gidcache.h"
-#include "mem-pool.h"
+#include "glusterfs/gidcache.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
/*
* We treat this as a very simple set-associative LRU cache, with entries aged
@@ -64,8 +65,8 @@ gid_cache_lookup(gid_cache_t *cache, uint64_t id, uint64_t uid, uint64_t gid)
time_t now;
const gid_list_t *agl;
+ now = gf_time();
LOCK(&cache->gc_lock);
- now = time(NULL);
bucket = id % cache->gc_nbuckets;
agl = BUCKET_START(cache->gc_cache, bucket);
for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
@@ -132,8 +133,8 @@ gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
if (!cache->gc_max_age)
return 0;
+ now = gf_time();
LOCK(&cache->gc_lock);
- now = time(NULL);
/*
* Scan for the first free entry or one that matches this id. The id
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index 98990460c86..ae06f8be386 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -10,13 +10,8 @@
#include <pthread.h>
-#include "glusterfs.h"
-#include "globals.h"
-#include "xlator.h"
-#include "mem-pool.h"
-#include "syncop.h"
-#include "libglusterfs-messages.h"
-#include "upcall-utils.h"
+#include "glusterfs/syncop.h"
+#include "glusterfs/libglusterfs-messages.h"
const char *gf_fop_list[GF_FOP_MAXVALUE] = {
[GF_FOP_NULL] = "NULL",
@@ -77,6 +72,7 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = {
[GF_FOP_PUT] = "PUT",
[GF_FOP_ICREATE] = "ICREATE",
[GF_FOP_NAMELINK] = "NAMELINK",
+ [GF_FOP_COPY_FILE_RANGE] = "COPY_FILE_RANGE",
};
const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = {
@@ -98,16 +94,19 @@ const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = {
glusterfs_ctx_t *global_ctx = NULL;
pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
xlator_t global_xlator;
-static pthread_key_t this_xlator_key;
-static pthread_key_t synctask_key;
-static pthread_key_t uuid_buf_key;
-static char global_uuid_buf[GF_UUID_BUF_SIZE];
-static pthread_key_t lkowner_buf_key;
-static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE];
-static pthread_key_t leaseid_buf_key;
static int gf_global_mem_acct_enable = 1;
static pthread_once_t globals_inited = PTHREAD_ONCE_INIT;
+static pthread_key_t free_key;
+
+static __thread xlator_t *thread_xlator = NULL;
+static __thread void *thread_synctask = NULL;
+static __thread void *thread_leaseid = NULL;
+static __thread struct syncopctx thread_syncopctx = {};
+static __thread char thread_uuid_buf[GF_UUID_BUF_SIZE] = {};
+static __thread char thread_lkowner_buf[GF_LKOWNER_BUF_SIZE] = {};
+static __thread char thread_leaseid_buf[GF_LEASE_ID_BUF_SIZE] = {};
+
int
gf_global_mem_acct_enable_get(void)
{
@@ -121,12 +120,6 @@ gf_global_mem_acct_enable_set(int val)
return 0;
}
-void
-glusterfs_this_destroy(void *ptr)
-{
- FREE(ptr);
-}
-
static struct xlator_cbks global_cbks = {
.forget = NULL,
.release = NULL,
@@ -211,18 +204,9 @@ struct volume_options global_xl_options[] = {
static volume_opt_list_t global_xl_opt_list;
-int
+void
glusterfs_this_init()
{
- int ret = 0;
- ret = pthread_key_create(&this_xlator_key, glusterfs_this_destroy);
- if (ret != 0) {
- gf_msg("", GF_LOG_WARNING, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED,
- "failed to create "
- "the pthread key");
- return ret;
- }
-
global_xlator.name = "glusterfs";
global_xlator.type = GF_GLOBAL_XLATOR_NAME;
global_xlator.cbks = &global_cbks;
@@ -236,301 +220,131 @@ glusterfs_this_init()
global_xl_opt_list.given_opt = global_xl_options;
list_add_tail(&global_xl_opt_list.list, &global_xlator.volume_options);
-
- return ret;
}
xlator_t **
__glusterfs_this_location()
{
- xlator_t **this_location = NULL;
- int ret = 0;
-
- this_location = pthread_getspecific(this_xlator_key);
-
- if (!this_location) {
- this_location = CALLOC(1, sizeof(*this_location));
- if (!this_location)
- goto out;
+ xlator_t **this_location;
- ret = pthread_setspecific(this_xlator_key, this_location);
- if (ret != 0) {
- FREE(this_location);
- this_location = NULL;
- goto out;
- }
- }
-out:
- if (this_location) {
- if (!*this_location)
- *this_location = &global_xlator;
+ this_location = &thread_xlator;
+ if (*this_location == NULL) {
+ thread_xlator = &global_xlator;
}
+
return this_location;
}
xlator_t *
glusterfs_this_get()
{
- xlator_t **this_location = NULL;
-
- this_location = __glusterfs_this_location();
- if (!this_location)
- return &global_xlator;
-
- return *this_location;
+ return *__glusterfs_this_location();
}
-int
+void
glusterfs_this_set(xlator_t *this)
{
- xlator_t **this_location = NULL;
-
- this_location = __glusterfs_this_location();
- if (!this_location)
- return -ENOMEM;
-
- *this_location = this;
-
- return 0;
+ thread_xlator = this;
}
/* SYNCOPCTX */
-static pthread_key_t syncopctx_key;
-
-static void
-syncopctx_key_destroy(void *ptr)
-{
- struct syncopctx *opctx = ptr;
-
- if (opctx) {
- if (opctx->groups)
- GF_FREE(opctx->groups);
-
- GF_FREE(opctx);
- }
-
- return;
-}
void *
syncopctx_getctx()
{
- void *opctx = NULL;
-
- opctx = pthread_getspecific(syncopctx_key);
-
- return opctx;
-}
-
-int
-syncopctx_setctx(void *ctx)
-{
- int ret = 0;
-
- ret = pthread_setspecific(syncopctx_key, ctx);
-
- return ret;
-}
-
-static int
-syncopctx_init(void)
-{
- int ret;
-
- ret = pthread_key_create(&syncopctx_key, syncopctx_key_destroy);
-
- return ret;
+ return &thread_syncopctx;
}
/* SYNCTASK */
-int
-synctask_init()
-{
- int ret = 0;
-
- ret = pthread_key_create(&synctask_key, NULL);
-
- return ret;
-}
-
void *
synctask_get()
{
- void *synctask = NULL;
-
- synctask = pthread_getspecific(synctask_key);
-
- return synctask;
+ return thread_synctask;
}
-int
+void
synctask_set(void *synctask)
{
- int ret = 0;
-
- pthread_setspecific(synctask_key, synctask);
-
- return ret;
+ thread_synctask = synctask;
}
// UUID_BUFFER
-void
-glusterfs_uuid_buf_destroy(void *ptr)
-{
- FREE(ptr);
-}
-
-int
-glusterfs_uuid_buf_init()
-{
- int ret = 0;
-
- ret = pthread_key_create(&uuid_buf_key, glusterfs_uuid_buf_destroy);
- return ret;
-}
-
char *
glusterfs_uuid_buf_get()
{
- char *buf;
- int ret = 0;
-
- buf = pthread_getspecific(uuid_buf_key);
- if (!buf) {
- buf = MALLOC(GF_UUID_BUF_SIZE);
- ret = pthread_setspecific(uuid_buf_key, (void *)buf);
- if (ret)
- buf = global_uuid_buf;
- }
- return buf;
+ return thread_uuid_buf;
}
/* LKOWNER_BUFFER */
-void
-glusterfs_lkowner_buf_destroy(void *ptr)
-{
- FREE(ptr);
-}
-
-int
-glusterfs_lkowner_buf_init()
-{
- int ret = 0;
-
- ret = pthread_key_create(&lkowner_buf_key, glusterfs_lkowner_buf_destroy);
- return ret;
-}
-
char *
glusterfs_lkowner_buf_get()
{
- char *buf;
- int ret = 0;
-
- buf = pthread_getspecific(lkowner_buf_key);
- if (!buf) {
- buf = MALLOC(GF_LKOWNER_BUF_SIZE);
- ret = pthread_setspecific(lkowner_buf_key, (void *)buf);
- if (ret)
- buf = global_lkowner_buf;
- }
- return buf;
+ return thread_lkowner_buf;
}
/* Leaseid buffer */
-void
-glusterfs_leaseid_buf_destroy(void *ptr)
-{
- FREE(ptr);
-}
-
-int
-glusterfs_leaseid_buf_init()
-{
- int ret = 0;
-
- ret = pthread_key_create(&leaseid_buf_key, glusterfs_leaseid_buf_destroy);
- return ret;
-}
char *
glusterfs_leaseid_buf_get()
{
char *buf = NULL;
- int ret = 0;
- buf = pthread_getspecific(leaseid_buf_key);
- if (!buf) {
- buf = CALLOC(1, GF_LEASE_ID_BUF_SIZE);
- ret = pthread_setspecific(leaseid_buf_key, (void *)buf);
- if (ret) {
- FREE(buf);
- buf = NULL;
- }
+ buf = thread_leaseid;
+ if (buf == NULL) {
+ buf = thread_leaseid_buf;
+ thread_leaseid = buf;
}
+
return buf;
}
char *
glusterfs_leaseid_exist()
{
- return pthread_getspecific(leaseid_buf_key);
+ return thread_leaseid;
}
static void
-gf_globals_init_once()
+glusterfs_cleanup(void *ptr)
{
- int ret = 0;
-
- ret = glusterfs_this_init();
- if (ret) {
- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_TRANSLATOR_INIT_FAILED,
- "ERROR: glusterfs-translator init failed");
- goto out;
+ if (thread_syncopctx.groups != NULL) {
+ GF_FREE(thread_syncopctx.groups);
}
- ret = glusterfs_uuid_buf_init();
- if (ret) {
- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_UUID_BUF_INIT_FAILED,
- "ERROR: glusterfs uuid buffer init failed");
- goto out;
- }
+ mem_pool_thread_destructor(NULL);
+}
- ret = glusterfs_lkowner_buf_init();
- if (ret) {
- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_LKOWNER_BUF_INIT_FAILED,
- "ERROR: glusterfs lkowner buffer init failed");
- goto out;
- }
+void
+gf_thread_needs_cleanup(void)
+{
+ /* The value stored in free_key TLS is not really used for anything, but
+ * pthread implementation doesn't call the TLS destruction function unless
+ * it's != NULL. This function must be called whenever something is
+ * allocated for this thread so that glusterfs_cleanup() will be called
+ * and resources can be released. */
+ (void)pthread_setspecific(free_key, (void *)1);
+}
- ret = glusterfs_leaseid_buf_init();
- if (ret) {
- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_LEASEID_BUF_INIT_FAILED,
- "ERROR: glusterfs leaseid buffer init failed");
- goto out;
- }
+static void
+gf_globals_init_once()
+{
+ int ret = 0;
- ret = synctask_init();
- if (ret) {
- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_SYNCTASK_INIT_FAILED,
- "ERROR: glusterfs synctask init failed");
- goto out;
- }
+ glusterfs_this_init();
- ret = syncopctx_init();
- if (ret) {
- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_SYNCOPCTX_INIT_FAILED,
- "ERROR: glusterfs syncopctx init failed");
- goto out;
- }
-out:
+ /* This is needed only to cleanup the potential allocation of
+ * thread_syncopctx.groups. */
+ ret = pthread_key_create(&free_key, glusterfs_cleanup);
+ if (ret != 0) {
+ gf_msg("", GF_LOG_ERROR, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED,
+ "failed to create the pthread key");
- if (ret) {
gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_GLOBAL_INIT_FAILED,
"Exiting as global initialization failed");
+
exit(ret);
}
}
diff --git a/libglusterfs/src/glusterfs/async.h b/libglusterfs/src/glusterfs/async.h
new file mode 100644
index 00000000000..d1d70ae0bc7
--- /dev/null
+++ b/libglusterfs/src/glusterfs/async.h
@@ -0,0 +1,209 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc <https://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GLUSTERFS_ASYNC_H__
+#define __GLUSTERFS_ASYNC_H__
+
+#define _LGPL_SOURCE
+
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+
+#ifdef URCU_OLD
+
+/* TODO: Fix the include paths. Since this is a .h included from many places
+ * it makes no sense to append a '-I$(CONTRIBDIR)/userspace-rcu/' to each
+ * Makefile.am. I've also seen some problems with CI builders (they
+ * failed to find the include files, but the same source on another setup
+ * is working fine). */
+#include "wfcqueue.h"
+#include "wfstack.h"
+
+#else /* !URCU_OLD */
+
+#include <urcu/wfcqueue.h>
+#include <urcu/wfstack.h>
+
+#endif /* URCU_OLD */
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/list.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+/* This is the name prefix that all worker threads will have. A number will
+ * be added to differentiate them. */
+#define GF_ASYNC_THREAD_NAME "tpw"
+
+/* This value determines the maximum number of threads that are allowed. */
+#define GF_ASYNC_MAX_THREADS 128
+
+/* This value determines how many additional threads will be started but will
+ * remain inactive until they are explicitly activated by the leader. This is
+ * useful to react faster to bursts of load, but at the same time we minimize
+ * contention if they are not really needed to handle current load.
+ *
+ * TODO: Instead of a fixed number, it would probably be better to use a
+ * prcentage of the available cores. */
+#define GF_ASYNC_SPARE_THREADS 2
+
+/* This value determines the signal used to wake the leader when new work has
+ * been added to the queue. To do so we reuse SIGALRM, since the most logical
+ * candidates (SIGUSR1/SIGUSR2) are already used. This signal must not be used
+ * by anything else in the process. */
+#define GF_ASYNC_SIGQUEUE SIGALRM
+
+/* This value determines the signal that will be used to transfer leader role
+ * to other workers. */
+#define GF_ASYNC_SIGCTRL SIGVTALRM
+
+#define gf_async_warning(_err, _msg, _args...) \
+ gf_msg("async", GF_LOG_WARNING, -(_err), LG_MSG_ASYNC_WARNING, _msg, \
+ ##_args)
+
+#define gf_async_error(_err, _msg, _args...) \
+ gf_msg("async", GF_LOG_ERROR, -(_err), LG_MSG_ASYNC_FAILURE, _msg, ##_args)
+
+#define gf_async_fatal(_err, _msg, _args...) \
+ do { \
+ GF_ABORT("Critical error in async module. Unable to continue. (" _msg \
+ "). Error %d.", \
+ ##_args, -(_err)); \
+ } while (0)
+
+struct _gf_async;
+typedef struct _gf_async gf_async_t;
+
+struct _gf_async_worker;
+typedef struct _gf_async_worker gf_async_worker_t;
+
+struct _gf_async_queue;
+typedef struct _gf_async_queue gf_async_queue_t;
+
+struct _gf_async_control;
+typedef struct _gf_async_control gf_async_control_t;
+
+typedef void (*gf_async_callback_f)(xlator_t *xl, gf_async_t *async);
+
+struct _gf_async {
+ /* TODO: remove dependency on xl/THIS. */
+ xlator_t *xl;
+ gf_async_callback_f cbk;
+ struct cds_wfcq_node queue;
+};
+
+struct _gf_async_worker {
+ /* Used to send asynchronous jobs related to the worker. */
+ gf_async_t async;
+
+ /* Member of the available workers stack. */
+ struct cds_wfs_node stack;
+
+ /* Thread object of the current worker. */
+ pthread_t thread;
+
+ /* Unique identifier of this worker. */
+ int32_t id;
+
+ /* Indicates if this worker is enabled. */
+ bool running;
+};
+
+struct _gf_async_queue {
+ /* Structures needed to manage a wait-free queue. For better performance
+ * they are placed in two different cache lines, as recommended by URCU
+ * documentation, even though in our case some threads will be producers
+ * and consumers at the same time. */
+ struct cds_wfcq_head head __attribute__((aligned(64)));
+ struct cds_wfcq_tail tail __attribute__((aligned(64)));
+};
+
+#define GF_ASYNC_COUNTS(_run, _stop) (((uint32_t)(_run) << 16) + (_stop))
+#define GF_ASYNC_COUNT_RUNNING(_count) ((_count) >> 16)
+#define GF_ASYNC_COUNT_STOPPING(_count) ((_count)&65535)
+
+struct _gf_async_control {
+ gf_async_queue_t queue;
+
+ /* Stack of unused workers. */
+ struct __cds_wfs_stack available;
+
+ /* Array of preallocated worker structures. */
+ gf_async_worker_t *table;
+
+ /* Used to synchronize main thread with workers on termination. */
+ pthread_barrier_t sync;
+
+ /* The id of the last thread that will be used for synchronization. */
+ pthread_t sync_thread;
+
+ /* Signal mask to wait for control signals from leader. */
+ sigset_t sigmask_ctrl;
+
+ /* Signal mask to wait for queued items. */
+ sigset_t sigmask_queue;
+
+ /* Saved signal handlers. */
+ struct sigaction handler_ctrl;
+ struct sigaction handler_queue;
+
+ /* PID of the current process. */
+ pid_t pid;
+
+ /* Maximum number of allowed threads. */
+ uint32_t max_threads;
+
+ /* Current number of running and stopping workers. This value is split
+ * into 2 16-bits fields to track both counters atomically at the same
+ * time. */
+ uint32_t counts;
+
+ /* It's used to control whether the asynchronous infrastructure is used
+ * or not. */
+ bool enabled;
+};
+
+extern gf_async_control_t gf_async_ctrl;
+
+int32_t
+gf_async_init(glusterfs_ctx_t *ctx);
+
+void
+gf_async_fini(void);
+
+void
+gf_async_adjust_threads(int32_t threads);
+
+static inline void
+gf_async(gf_async_t *async, xlator_t *xl, gf_async_callback_f cbk)
+{
+ if (!gf_async_ctrl.enabled) {
+ cbk(xl, async);
+ return;
+ }
+
+ async->xl = xl;
+ async->cbk = cbk;
+ cds_wfcq_node_init(&async->queue);
+ if (caa_unlikely(!cds_wfcq_enqueue(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail,
+ &async->queue))) {
+ /* The queue was empty, so the leader could be sleeping. We need to
+ * wake it so that the new item can be processed. If the queue was not
+ * empty, we don't need to do anything special since the leader will
+ * take care of it. */
+ if (caa_unlikely(kill(gf_async_ctrl.pid, GF_ASYNC_SIGQUEUE) < 0)) {
+ gf_async_fatal(errno, "Unable to wake leader worker.");
+ };
+ }
+}
+
+#endif /* !__GLUSTERFS_ASYNC_H__ */
diff --git a/libglusterfs/src/atomic.h b/libglusterfs/src/glusterfs/atomic.h
index dbbdc309628..ced81748218 100644
--- a/libglusterfs/src/atomic.h
+++ b/libglusterfs/src/glusterfs/atomic.h
@@ -14,7 +14,7 @@
#include <inttypes.h>
#include <stdbool.h>
-#include "locking.h"
+#include "glusterfs/locking.h"
/* Macros used to join two arguments and generate a new macro name. */
#define GF_ATOMIC_MACRO_1(_macro) _macro
diff --git a/libglusterfs/src/byte-order.h b/libglusterfs/src/glusterfs/byte-order.h
index fd8cef9e58d..fd8cef9e58d 100644
--- a/libglusterfs/src/byte-order.h
+++ b/libglusterfs/src/glusterfs/byte-order.h
diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/glusterfs/call-stub.h
index 815ea312c93..8237ea459bf 100644
--- a/libglusterfs/src/call-stub.h
+++ b/libglusterfs/src/glusterfs/call-stub.h
@@ -11,23 +11,19 @@
#ifndef _CALL_STUB_H_
#define _CALL_STUB_H_
-#include "xlator.h"
-#include "defaults.h"
-#include "default-args.h"
-#include "stack.h"
-#include "list.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/default-args.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/list.h"
typedef struct _call_stub {
struct list_head list;
- char wind;
call_frame_t *frame;
- glusterfs_fop_t fop;
- gf_boolean_t poison;
struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
uint32_t jnl_meta_len;
uint32_t jnl_data_len;
void (*serialize)(struct _call_stub *, char *, char *);
-
union {
fop_lookup_t lookup;
fop_stat_t stat;
@@ -81,6 +77,7 @@ typedef struct _call_stub {
fop_put_t put;
fop_icreate_t icreate;
fop_namelink_t namelink;
+ fop_copy_file_range_t copy_file_range;
} fn;
union {
@@ -136,8 +133,11 @@ typedef struct _call_stub {
fop_put_cbk_t put;
fop_icreate_cbk_t icreate;
fop_namelink_cbk_t namelink;
+ fop_copy_file_range_cbk_t copy_file_range;
} fn_cbk;
-
+ glusterfs_fop_t fop;
+ gf_boolean_t poison;
+ char wind;
default_args_t args;
default_args_cbk_t args_cbk;
} call_stub_t;
@@ -589,6 +589,18 @@ fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata);
+call_stub_t *
+fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t len, uint32_t flags,
+ dict_t *xdata);
+
+call_stub_t *
+fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
void
call_resume(call_stub_t *stub);
void
diff --git a/libglusterfs/src/checksum.h b/libglusterfs/src/glusterfs/checksum.h
index 019bb14df71..019bb14df71 100644
--- a/libglusterfs/src/checksum.h
+++ b/libglusterfs/src/glusterfs/checksum.h
diff --git a/libglusterfs/src/circ-buff.h b/libglusterfs/src/glusterfs/circ-buff.h
index a839cd03c67..822345b641b 100644
--- a/libglusterfs/src/circ-buff.h
+++ b/libglusterfs/src/glusterfs/circ-buff.h
@@ -11,9 +11,7 @@
#ifndef _CB_H
#define _CB_H
-#include "common-utils.h"
-#include "logging.h"
-#include "mem-types.h"
+#include "glusterfs/common-utils.h"
#define BUFFER_SIZE 10
#define TOTAL_SIZE BUFFER_SIZE + 1
diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/glusterfs/client_t.h
index 57241a00d5f..a2c508e1d5c 100644
--- a/libglusterfs/src/client_t.h
+++ b/libglusterfs/src/glusterfs/client_t.h
@@ -11,9 +11,20 @@
#ifndef _CLIENT_T_H
#define _CLIENT_T_H
-#include "glusterfs.h"
-#include "locking.h" /* for gf_lock_t, not included by glusterfs.h */
-#include "atomic.h" /* for gf_atomic_t */
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h" /* for gf_lock_t, not included by glusterfs.h */
+#include "glusterfs/atomic.h" /* for gf_atomic_t */
+
+/* auth_data structure is required by RPC layer. But as it is also used in
+ * client_t structure validation, comparision, it is critical that it is defined
+ * in the larger scope of libglusterfs, instead of libgfrpc. With this change,
+ * even RPC will use this structure */
+#define GF_CLIENTT_AUTH_BYTES 400
+typedef struct client_auth_data {
+ int flavour;
+ int datalen;
+ char authdata[GF_CLIENTT_AUTH_BYTES];
+} client_auth_data_t;
struct client_ctx {
void *ctx_key;
@@ -47,6 +58,8 @@ typedef struct _client {
inode_t *subdir_inode;
uuid_t subdir_gfid;
int32_t opversion;
+ /* Variable to save fd_count for detach brick */
+ gf_atomic_t fd_cnt;
} client_t;
#define GF_CLIENTCTX_INITIAL_SIZE 8
@@ -76,21 +89,12 @@ typedef struct clienttable clienttable_t;
*/
#define GF_CLIENTENTRY_ALLOCATED -2
-struct rpcsvc_auth_data;
-
-client_t *
-gf_client_get(xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid,
- char *subdir_mount);
-
void
gf_client_put(client_t *client, gf_boolean_t *detached);
clienttable_t *
gf_clienttable_alloc(void);
-void
-gf_client_clienttable_destroy(clienttable_t *clienttable);
-
client_t *
gf_client_ref(client_t *client);
@@ -136,4 +140,8 @@ gf_client_dump_inodes(xlator_t *this);
int
gf_client_disconnect(client_t *client);
+client_t *
+gf_client_get(xlator_t *this, client_auth_data_t *cred, char *client_uid,
+ char *subdir_mount);
+
#endif /* _CLIENT_T_H */
diff --git a/libglusterfs/src/cluster-syncop.h b/libglusterfs/src/glusterfs/cluster-syncop.h
index 10388db74cc..d0ad5ed548c 100644
--- a/libglusterfs/src/cluster-syncop.h
+++ b/libglusterfs/src/glusterfs/cluster-syncop.h
@@ -11,13 +11,13 @@
#ifndef _CLUSTER_SYNCOP_H
#define _CLUSTER_SYNCOP_H
-#include "xlator.h"
#include <sys/time.h>
#include <pthread.h>
#include <ucontext.h>
-#include "defaults.h"
-#include "default-args.h"
-#include "syncop.h"
+
+#include "glusterfs/defaults.h"
+#include "glusterfs/default-args.h"
+#include "glusterfs/syncop.h"
/*********************************************************************
*
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
index f0762566313..f297fdab5c9 100644
--- a/libglusterfs/src/common-utils.h
+++ b/libglusterfs/src/glusterfs/common-utils.h
@@ -18,6 +18,7 @@
#include <string.h>
#include <assert.h>
#include <pthread.h>
+#include <unistd.h>
#include <openssl/md5.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
@@ -26,6 +27,11 @@
#include <fnmatch.h>
#include <uuid/uuid.h>
+/* FreeBSD, etc. */
+#ifndef __BITS_PER_LONG
+#define __BITS_PER_LONG (CHAR_BIT * (sizeof(long)))
+#endif
+
#ifndef ffsll
#define ffsll(x) __builtin_ffsll(x)
#endif
@@ -38,15 +44,10 @@ trap(void);
/* To solve type punned error */
#define VOID(ptr) ((void **)((void *)ptr))
-#include "logging.h"
-#include "glusterfs.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "compat-uuid.h"
-#include "iatt.h"
-#include "libglusterfs-messages.h"
-#include "protocol-common.h"
-#include "iobuf.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/libglusterfs-messages.h"
#define STRINGIFY(val) #val
#define TOSTRING(val) STRINGIFY(val)
@@ -82,7 +83,6 @@ trap(void);
#define GF_UNIT_PERCENT_STRING "%"
#define GEOREP "geo-replication"
-#define GHADOOP "glusterfs-hadoop"
#define GLUSTERD_NAME "glusterd"
#define GF_SELINUX_XATTR_KEY "security.selinux"
@@ -120,12 +120,16 @@ trap(void);
#define GF_HOUR_IN_SECONDS (60 * 60)
#define GF_DAY_IN_SECONDS (24 * 60 * 60)
#define GF_WEEK_IN_SECONDS (7 * 24 * 60 * 60)
+#define GF_SEC_IN_NS 1000000000
+#define GF_MS_IN_NS 1000000
+#define GF_US_IN_NS 1000
/* Default timeout for both barrier and changelog translator */
#define BARRIER_TIMEOUT "120"
/* Default value of signing waiting time to sign a file for bitrot */
#define SIGNING_TIMEOUT "120"
+#define BR_WORKERS "4"
/* xxhash */
#define GF_XXH64_DIGEST_LENGTH 8
@@ -147,14 +151,11 @@ trap(void);
/* pthread related */
/* as per the man page, thread-name should be at max 16 bytes */
/* with prefix of 'glfs_' (5), we are left with 11 more bytes */
-#define GF_THREAD_NAMEMAX 11
+#define GF_THREAD_NAME_LIMIT 16
#define GF_THREAD_NAME_PREFIX "glfs_"
-#define GF_THREAD_NAME_PREFIX_LEN 5
-#include <stdbool.h>
-#define gf_boolean_t bool
-#define _gf_false false
-#define _gf_true true
+/* Advisory buffer size for formatted timestamps (see gf_time_fmt) */
+#define GF_TIMESTR_SIZE 256
/*
* we could have initialized these as +ve values and treated
@@ -174,7 +175,9 @@ enum _gf_special_pid {
GF_CLIENT_PID_BITD = -8,
GF_CLIENT_PID_SCRUB = -9,
GF_CLIENT_PID_TIER_DEFRAG = -10,
- GF_SERVER_PID_TRASH = -11
+ GF_SERVER_PID_TRASH = -11,
+ GF_CLIENT_PID_ADD_REPLICA_MOUNT = -12,
+ GF_CLIENT_PID_SET_UTIME = -13,
};
enum _gf_xlator_ipc_targets {
@@ -186,6 +189,12 @@ enum _gf_xlator_ipc_targets {
typedef enum _gf_special_pid gf_special_pid_t;
typedef enum _gf_xlator_ipc_targets _gf_xlator_ipc_targets_t;
+/* Array to hold custom xattr keys */
+extern char *xattrs_to_heal[];
+
+char **
+get_xattrs_to_heal();
+
/* The DHT file rename operation is not a straightforward rename.
* It involves creating linkto and linkfiles, and can unlink or rename the
* source file depending on the hashed and cached subvols for the source
@@ -246,6 +255,8 @@ list_node_del(struct list_node *node);
struct dnscache *
gf_dnscache_init(time_t ttl);
+void
+gf_dnscache_deinit(struct dnscache *cache);
struct dnscache_entry *
gf_dnscache_entry_init(void);
void
@@ -432,9 +443,6 @@ BIT_VALUE(unsigned char *array, unsigned int index)
} \
} while (0)
-#define GF_FILE_CONTENT_REQUESTED(_xattr_req, _content_limit) \
- (dict_get_uint64(_xattr_req, "glusterfs.content", _content_limit) == 0)
-
#ifdef DEBUG
#define GF_ASSERT(x) assert(x);
#else
@@ -447,7 +455,16 @@ BIT_VALUE(unsigned char *array, unsigned int index)
} while (0)
#endif
-#define GF_ABORT(msg) \
+/* Compile-time assert, borrowed from Linux kernel. */
+#ifdef HAVE_STATIC_ASSERT
+#define GF_STATIC_ASSERT(expr, ...) \
+ __gf_static_assert(expr, ##__VA_ARGS__, #expr)
+#define __gf_static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#else
+#define GF_STATIC_ASSERT(expr, ...)
+#endif
+
+#define GF_ABORT(msg...) \
do { \
gf_msg_callingfn("", GF_LOG_CRITICAL, 0, LG_MSG_ASSERTION_FAILED, \
"Assertion failed: " msg); \
@@ -477,18 +494,15 @@ union gf_sock_union {
#define IOV_MIN(n) min(IOV_MAX, n)
-#define GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scr) \
- do { \
- entry = NULL; \
- if (dir) { \
- entry = sys_readdir(dir, scr); \
- while (entry && (!strcmp(entry->d_name, ".") || \
- !fnmatch("*.tmp", entry->d_name, 0) || \
- !strcmp(entry->d_name, ".."))) { \
- entry = sys_readdir(dir, scr); \
- } \
- } \
- } while (0)
+static inline gf_boolean_t
+gf_irrelevant_entry(struct dirent *entry)
+{
+ GF_ASSERT(entry);
+
+ return (!strcmp(entry->d_name, ".") ||
+ !fnmatch("*.tmp", entry->d_name, 0) ||
+ !strcmp(entry->d_name, ".."));
+}
static inline void
iov_free(struct iovec *vector, int count)
@@ -517,65 +531,193 @@ static inline struct iovec *
iov_dup(const struct iovec *vector, int count)
{
int bytecount = 0;
- int i;
struct iovec *newvec = NULL;
bytecount = (count * sizeof(struct iovec));
newvec = GF_MALLOC(bytecount, gf_common_mt_iovec);
- if (!newvec)
- return NULL;
-
- for (i = 0; i < count; i++) {
- newvec[i].iov_len = vector[i].iov_len;
- newvec[i].iov_base = vector[i].iov_base;
+ if (newvec != NULL) {
+ memcpy(newvec, vector, bytecount);
}
return newvec;
}
-static inline int
-iov_subset(struct iovec *orig, int orig_count, off_t src_offset,
- off_t dst_offset, struct iovec *new)
+typedef struct _iov_iter {
+ const struct iovec *iovec;
+ void *ptr;
+ uint32_t len;
+ uint32_t count;
+} iov_iter_t;
+
+static inline bool
+iov_iter_init(iov_iter_t *iter, const struct iovec *iovec, uint32_t count,
+ uint32_t offset)
{
- int new_count = 0;
- int i;
- off_t offset = 0;
- size_t start_offset = 0;
- size_t end_offset = 0, origin_iov_len = 0;
+ uint32_t len;
- for (i = 0; i < orig_count; i++) {
- origin_iov_len = orig[i].iov_len;
+ while (count > 0) {
+ count--;
+ len = iovec->iov_len;
+ if (offset < len) {
+ iter->ptr = iovec->iov_base + offset;
+ iter->len = len - offset;
+ iter->iovec = iovec + 1;
+ iter->count = count;
- if ((offset + orig[i].iov_len < src_offset) || (offset > dst_offset)) {
- goto not_subset;
+ return true;
}
+ offset -= len;
+ }
- if (!new) {
- goto count_only;
- }
+ memset(iter, 0, sizeof(*iter));
+
+ return false;
+}
- start_offset = 0;
- end_offset = orig[i].iov_len;
+static inline bool
+iov_iter_end(iov_iter_t *iter)
+{
+ return iter->count == 0;
+}
+
+static inline bool
+iov_iter_next(iov_iter_t *iter, uint32_t size)
+{
+ GF_ASSERT(size <= iter->len);
- if (src_offset >= offset) {
- start_offset = (src_offset - offset);
+ if (iter->len > size) {
+ iter->len -= size;
+ iter->ptr += size;
+
+ return true;
+ }
+ if (iter->count > 0) {
+ iter->count--;
+ iter->ptr = iter->iovec->iov_base;
+ iter->len = iter->iovec->iov_len;
+ iter->iovec++;
+
+ return true;
+ }
+
+ memset(iter, 0, sizeof(*iter));
+
+ return false;
+}
+
+static inline uint32_t
+iov_iter_copy(iov_iter_t *dst, iov_iter_t *src, uint32_t size)
+{
+ uint32_t len;
+
+ len = src->len;
+ if (len > dst->len) {
+ len = dst->len;
+ }
+ if (len > size) {
+ len = size;
+ }
+ memcpy(dst->ptr, src->ptr, len);
+
+ return len;
+}
+
+static inline uint32_t
+iov_iter_to_iovec(iov_iter_t *iter, struct iovec *iovec, int32_t idx,
+ uint32_t size)
+{
+ uint32_t len;
+
+ len = iter->len;
+ if (len > size) {
+ len = size;
+ }
+ iovec[idx].iov_base = iter->ptr;
+ iovec[idx].iov_len = len;
+
+ return len;
+}
+
+static inline int
+iov_subset(struct iovec *src, int src_count, uint32_t start, uint32_t size,
+ struct iovec **dst, int32_t dst_count)
+{
+ struct iovec iovec[src_count];
+ iov_iter_t iter;
+ uint32_t len;
+ int32_t idx;
+
+ if ((size == 0) || !iov_iter_init(&iter, src, src_count, start)) {
+ return 0;
+ }
+
+ idx = 0;
+ do {
+ len = iov_iter_to_iovec(&iter, iovec, idx, size);
+ idx++;
+ size -= len;
+ } while ((size > 0) && iov_iter_next(&iter, len));
+
+ if (*dst == NULL) {
+ *dst = iov_dup(iovec, idx);
+ if (*dst == NULL) {
+ return -1;
}
+ } else if (idx > dst_count) {
+ return -1;
+ } else {
+ memcpy(*dst, iovec, idx * sizeof(struct iovec));
+ }
+
+ return idx;
+}
- if (dst_offset <= (offset + orig[i].iov_len)) {
- end_offset = (dst_offset - offset);
+static inline int
+iov_skip(struct iovec *iovec, uint32_t count, uint32_t size)
+{
+ uint32_t len, idx;
+
+ idx = 0;
+ while ((size > 0) && (idx < count)) {
+ len = iovec[idx].iov_len;
+ if (len > size) {
+ iovec[idx].iov_len -= size;
+ iovec[idx].iov_base += size;
+ break;
}
+ idx++;
+ size -= len;
+ }
- new[new_count].iov_base = orig[i].iov_base + start_offset;
- new[new_count].iov_len = end_offset - start_offset;
+ if (idx > 0) {
+ memmove(iovec, iovec + idx, (count - idx) * sizeof(struct iovec));
+ }
+
+ return count - idx;
+}
- count_only:
- new_count++;
+static inline size_t
+iov_range_copy(const struct iovec *dst, uint32_t dst_count, uint32_t dst_offset,
+ const struct iovec *src, uint32_t src_count, uint32_t src_offset,
+ uint32_t size)
+{
+ iov_iter_t src_iter, dst_iter;
+ uint32_t len, total;
- not_subset:
- offset += origin_iov_len;
+ if ((size == 0) || !iov_iter_init(&src_iter, src, src_count, src_offset) ||
+ !iov_iter_init(&dst_iter, dst, dst_count, dst_offset)) {
+ return 0;
}
- return new_count;
+ total = 0;
+ do {
+ len = iov_iter_copy(&dst_iter, &src_iter, size);
+ total += len;
+ size -= len;
+ } while ((size > 0) && iov_iter_next(&src_iter, len) &&
+ iov_iter_next(&dst_iter, len));
+
+ return total;
}
static inline void
@@ -614,35 +756,7 @@ iov_load(const struct iovec *vector, int count, char *buf, int size)
static inline size_t
iov_copy(const struct iovec *dst, int dcnt, const struct iovec *src, int scnt)
{
- size_t ret = 0;
- size_t left = 0;
- size_t min_i = 0;
- int s_i = 0, s_ii = 0;
- int d_i = 0, d_ii = 0;
-
- ret = min(iov_length(dst, dcnt), iov_length(src, scnt));
- left = ret;
-
- while (left) {
- min_i = min(dst[d_i].iov_len - d_ii, src[s_i].iov_len - s_ii);
- memcpy(dst[d_i].iov_base + d_ii, src[s_i].iov_base + s_ii, min_i);
-
- d_ii += min_i;
- if (d_ii == dst[d_i].iov_len) {
- d_ii = 0;
- d_i++;
- }
-
- s_ii += min_i;
- if (s_ii == src[s_i].iov_len) {
- s_ii = 0;
- s_i++;
- }
-
- left -= min_i;
- }
-
- return ret;
+ return iov_range_copy(dst, dcnt, 0, src, scnt, 0, UINT32_MAX);
}
/* based on the amusing discussion @ https://rusty.ozlabs.org/?p=560 */
@@ -673,7 +787,7 @@ mem_0filled(const char *buf, size_t size)
}
static inline int
-iov_0filled(struct iovec *vector, int count)
+iov_0filled(const struct iovec *vector, int count)
{
int i = 0;
int ret = 0;
@@ -687,19 +801,6 @@ iov_0filled(struct iovec *vector, int count)
return ret;
}
-static inline void *
-memdup(const void *ptr, size_t size)
-{
- void *newptr = NULL;
-
- newptr = GF_MALLOC(size, gf_common_mt_memdup);
- if (!newptr)
- return NULL;
-
- memcpy(newptr, ptr, size);
- return newptr;
-}
-
typedef enum {
gf_timefmt_default = 0,
gf_timefmt_FT = 0, /* YYYY-MM-DD hh:mm:ss */
@@ -712,7 +813,7 @@ typedef enum {
} gf_timefmts;
static inline char *
-gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
+gf_time_fmt_tv(char *dst, size_t sz_dst, struct timeval *tv, unsigned int fmt)
{
extern void _gf_timestuff(const char ***, const char ***);
static gf_timefmts timefmt_last = (gf_timefmts)-1;
@@ -720,6 +821,8 @@ gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
static const char **zeros;
struct tm tm, *res;
int localtime = 0;
+ int len = 0;
+ int pos = 0;
if (timefmt_last == ((gf_timefmts)-1)) {
_gf_timestuff(&fmts, &zeros);
@@ -729,15 +832,35 @@ gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
fmt = gf_timefmt_default;
}
localtime = gf_log_get_localtime();
- res = localtime ? localtime_r(&utime, &tm) : gmtime_r(&utime, &tm);
- if (utime && (res != NULL)) {
- strftime(dst, sz_dst, fmts[fmt], &tm);
+ res = localtime ? localtime_r(&tv->tv_sec, &tm)
+ : gmtime_r(&tv->tv_sec, &tm);
+ if (tv->tv_sec && (res != NULL)) {
+ len = strftime(dst, sz_dst, fmts[fmt], &tm);
+ if (len == 0)
+ return dst;
+ pos += len;
+ if (tv->tv_usec >= 0) {
+ len = snprintf(dst + pos, sz_dst - pos, ".%" GF_PRI_SUSECONDS,
+ tv->tv_usec);
+ if (len >= sz_dst - pos)
+ return dst;
+ pos += len;
+ }
+ strftime(dst + pos, sz_dst - pos, " %z", &tm);
} else {
strncpy(dst, "N/A", sz_dst);
}
return dst;
}
+static inline char *
+gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
+{
+ struct timeval tv = {utime, -1};
+
+ return gf_time_fmt_tv(dst, sz_dst, &tv, fmt);
+}
+
/* This function helps us use gfid (unique identity) to generate inode's unique
* number in glusterfs.
*/
@@ -828,6 +951,8 @@ gf_string2percent_or_bytesize(const char *str, double *n,
int
gf_string2boolean(const char *str, gf_boolean_t *b);
int
+gf_strn2boolean(const char *str, const int len, gf_boolean_t *b);
+int
gf_string2percent(const char *str, double *n);
int
gf_string2time(const char *str, uint32_t *n);
@@ -838,12 +963,12 @@ int
gf_unlockfd(int fd);
int
-get_checksum_for_file(int fd, uint32_t *checksum);
+get_checksum_for_file(int fd, uint32_t *checksum, int op_version);
int
log_base2(unsigned long x);
int
-get_checksum_for_path(char *path, uint32_t *checksum);
+get_checksum_for_path(char *path, uint32_t *checksum, int op_version);
int
get_file_mtime(const char *path, time_t *stamp);
char *
@@ -879,7 +1004,8 @@ valid_ipv4_address(char *address, int length, gf_boolean_t wildcard_acc);
char
valid_ipv6_address(char *address, int length, gf_boolean_t wildcard_acc);
char
-valid_internet_address(char *address, gf_boolean_t wildcard_acc);
+valid_internet_address(char *address, gf_boolean_t wildcard_acc,
+ gf_boolean_t cidr);
gf_boolean_t
valid_mount_auth_address(char *address);
gf_boolean_t
@@ -913,8 +1039,6 @@ gf_is_str_int(const char *value);
char *gf_uint64_2human_readable(uint64_t);
int
-get_vol_type(int type, int dist_count, int brick_count);
-int
validate_brick_name(char *brick);
char *
get_host_name(char *word, char **host);
@@ -953,10 +1077,24 @@ gf_set_timestamp(const char *src, const char *dest);
int
gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
- void *(*start_routine)(void *), void *arg, const char *name);
+ void *(*start_routine)(void *), void *arg, const char *name,
+ ...) __attribute__((__format__(__printf__, 5, 6)));
+
+int
+gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ va_list args);
int
gf_thread_create_detached(pthread_t *thread, void *(*start_routine)(void *),
- void *arg, const char *name);
+ void *arg, const char *name, ...)
+ __attribute__((__format__(__printf__, 4, 5)));
+
+void
+gf_thread_set_name(pthread_t thread, const char *name, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+void
+gf_thread_set_vname(pthread_t thread, const char *name, va_list args);
gf_boolean_t
gf_is_pid_running(int pid);
gf_boolean_t
@@ -1052,8 +1190,6 @@ gf_getgrouplist(const char *user, gid_t group, gid_t **groups);
int
glusterfs_compute_sha256(const unsigned char *content, size_t size,
char *sha256_hash);
-char *
-get_struct_variable(int mem_num, gf_gsync_status_t *sts_val);
char *
gf_strncpy(char *dest, const char *src, const size_t dest_size);
@@ -1067,4 +1203,54 @@ gf_replace_old_iatt_in_dict(struct _dict *);
int
gf_replace_new_iatt_in_dict(struct _dict *);
+xlator_cmdline_option_t *
+find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args);
+
+int
+gf_d_type_from_ia_type(ia_type_t type);
+
+int
+gf_syncfs(int fd);
+
+int
+gf_nanosleep(uint64_t nsec);
+
+static inline time_t
+gf_time(void)
+{
+ return time(NULL);
+}
+
+/* Return delta value in microseconds. */
+
+static inline double
+gf_tvdiff(struct timeval *start, struct timeval *end)
+{
+ struct timeval t;
+
+ if (start->tv_usec > end->tv_usec)
+ t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e6 +
+ (double)(t.tv_usec - start->tv_usec);
+}
+
+/* Return delta value in nanoseconds. */
+
+static inline double
+gf_tsdiff(struct timespec *start, struct timespec *end)
+{
+ struct timespec t;
+
+ if (start->tv_nsec > end->tv_nsec)
+ t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e9 +
+ (double)(t.tv_nsec - start->tv_nsec);
+}
+
#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/compat-errno.h b/libglusterfs/src/glusterfs/compat-errno.h
index c4ab09ab0d5..c4ab09ab0d5 100644
--- a/libglusterfs/src/compat-errno.h
+++ b/libglusterfs/src/glusterfs/compat-errno.h
diff --git a/libglusterfs/src/compat-uuid.h b/libglusterfs/src/glusterfs/compat-uuid.h
index 6e7fdefbfab..6e7fdefbfab 100644
--- a/libglusterfs/src/compat-uuid.h
+++ b/libglusterfs/src/glusterfs/compat-uuid.h
diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/glusterfs/compat.h
index 38c07b5ae7c..bf00d903152 100644
--- a/libglusterfs/src/compat.h
+++ b/libglusterfs/src/glusterfs/compat.h
@@ -40,10 +40,6 @@
#define GF_XATTR_NAME_MAX XATTR_NAME_MAX
#endif /* GF_LINUX_HOST_OS */
-#ifdef HAVE_XATTR_H
-#include <sys/xattr.h>
-#endif
-
/*
* Define the fallocate flags in case we do not have the header. This also
* accounts for older systems that do not define FALLOC_FL_PUNCH_HOLE.
@@ -116,6 +112,25 @@
#include <limits.h>
#include <libgen.h>
+/*
+ * This is where things like off64_t are defined.
+ * So include it before declaring _OFF64_T_DECLARED.
+ * If the freebsd version has support for off64_t
+ * including stdio.h should be sufficient.
+ */
+#include <stdio.h>
+
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
#ifndef XATTR_CREATE
enum {
@@ -513,6 +528,9 @@ dirname_r(char *path);
/* Use run API, see run.h */
#include <stdlib.h> /* system(), mkostemp() */
#include <stdio.h> /* popen() */
+#ifdef GF_LINUX_HOST_OS
+#include <sys/sysmacros.h>
+#endif
#pragma GCC poison system mkostemp popen
#endif
diff --git a/libglusterfs/src/daemon.h b/libglusterfs/src/glusterfs/daemon.h
index 48850800b5e..48850800b5e 100644
--- a/libglusterfs/src/daemon.h
+++ b/libglusterfs/src/glusterfs/daemon.h
diff --git a/libglusterfs/src/default-args.h b/libglusterfs/src/glusterfs/default-args.h
index e5915c4c05e..ca7526fcab6 100644
--- a/libglusterfs/src/default-args.h
+++ b/libglusterfs/src/glusterfs/default-args.h
@@ -15,7 +15,7 @@
#ifndef _DEFAULT_ARGS_H
#define _DEFAULT_ARGS_H
-#include "xlator.h"
+#include "glusterfs/xlator.h"
int
args_lookup_cbk_store(default_args_cbk_t *args, int32_t op_ret,
@@ -234,6 +234,12 @@ void
args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
struct gf_lease *lease, dict_t *xdata);
+int
+args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
void
args_cbk_wipe(default_args_cbk_t *args_cbk);
@@ -439,6 +445,11 @@ args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode,
int
args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+int
+args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off_t off64_out, size_t len,
+ uint32_t flags, dict_t *xdata);
+
void
args_cbk_init(default_args_cbk_t *args_cbk);
#endif /* _DEFAULT_ARGS_H */
diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/glusterfs/defaults.h
index 8583935e069..5a818eeb91a 100644
--- a/libglusterfs/src/defaults.h
+++ b/libglusterfs/src/glusterfs/defaults.h
@@ -15,7 +15,7 @@
#ifndef _DEFAULTS_H
#define _DEFAULTS_H
-#include "xlator.h"
+#include "glusterfs/xlator.h"
typedef struct {
int op_ret;
@@ -48,10 +48,20 @@ typedef struct {
} default_args_cbk_t;
typedef struct {
- loc_t loc; /* @old in rename(), link() */
- loc_t loc2; /* @new in rename(), link() */
- fd_t *fd;
+ loc_t loc; /* @old in rename(), link() */
+ loc_t loc2; /* @new in rename(), link() */
+ fd_t *fd; /* for all the fd based ops */
+ fd_t *fd_dst; /* Only for copy_file_range destination */
off_t offset;
+ /*
+ * According to the man page of copy_file_range,
+ * the offsets for source and destination file
+ * are of type loff_t. But the type loff_t is
+ * linux specific and is actual a typedef of
+ * off64_t.
+ */
+ off64_t off_in; /* For copy_file_range source fd */
+ off64_t off_out; /* For copy_file_range destination fd only */
int mask;
size_t size;
mode_t mode;
@@ -323,6 +333,11 @@ int32_t
default_namelink(call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata);
+int32_t
+default_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags, dict_t *xdata);
+
/* Resume */
int32_t
default_getspec_resume(call_frame_t *frame, xlator_t *this, const char *key,
@@ -542,6 +557,11 @@ default_put_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
int32_t count, off_t off, struct iobref *iobref,
dict_t *xattr, dict_t *xdata);
+int32_t
+default_copy_file_range_resume(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off64_in, fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags, dict_t *xdata);
+
/* _cbk_resume */
int32_t
@@ -813,6 +833,13 @@ int32_t
default_namelink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata);
+int32_t
+default_copy_file_range_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
/* _CBK */
int32_t
default_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
@@ -1072,6 +1099,12 @@ default_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata);
int32_t
+default_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+int32_t
default_lookup_failure_cbk(call_frame_t *frame, int32_t op_errno);
int32_t
@@ -1231,6 +1264,9 @@ int32_t
default_namelink_failure_cbk(call_frame_t *frame, int32_t op_errno);
int32_t
+default_copy_file_range_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
default_mem_acct_init(xlator_t *this);
void
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/glusterfs/dict.h
index 6cbf0ce5a54..d0467c6dfb6 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/glusterfs/dict.h
@@ -15,8 +15,7 @@
#include <sys/uio.h>
#include <pthread.h>
-#include "common-utils.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/common-utils.h"
typedef struct _data data_t;
typedef struct _dict dict_t;
@@ -26,9 +25,6 @@ typedef struct _data_pair data_pair_t;
#define dict_add_sizen(this, key, value) dict_addn(this, key, SLEN(key), value)
-#define dict_get_with_ref_sizen(this, key, value) \
- dict_get_with_refn(this, key, SLEN(key), value)
-
#define dict_get_sizen(this, key) dict_getn(this, key, SLEN(key))
#define dict_del_sizen(this, key) dict_deln(this, key, SLEN(key))
@@ -91,13 +87,15 @@ typedef struct _data_pair data_pair_t;
#define DICT_MAX_FLAGS 256
#define DICT_FLAG_SET 1
#define DICT_FLAG_CLEAR 0
+#define DICT_HDR_LEN 4
+#define DICT_DATA_HDR_KEY_LEN 4
+#define DICT_DATA_HDR_VAL_LEN 4
struct _data {
char *data;
gf_atomic_t refcount;
- gf_lock_t lock;
gf_dict_data_type_t data_type;
- int32_t len;
+ uint32_t len;
gf_boolean_t is_static;
};
@@ -117,12 +115,12 @@ struct _dict {
gf_atomic_t refcount;
data_pair_t **members;
data_pair_t *members_list;
- char *extra_free;
char *extra_stdfree;
gf_lock_t lock;
data_pair_t *members_internal;
data_pair_t free_pair;
- gf_boolean_t free_pair_in_use;
+ /* Variable to store total keylen + value->len */
+ uint32_t totkvlen;
};
typedef gf_boolean_t (*dict_match_t)(dict_t *d, char *k, data_t *v, void *data);
@@ -137,6 +135,7 @@ int32_t
dict_set(dict_t *this, char *key, data_t *value);
int32_t
dict_setn(dict_t *this, char *key, const int keylen, data_t *value);
+
/* function to set a new key/value pair (without checking for duplicate) */
int32_t
dict_add(dict_t *this, char *key, data_t *value);
@@ -144,8 +143,6 @@ int32_t
dict_addn(dict_t *this, char *key, const int keylen, data_t *value);
int
dict_get_with_ref(dict_t *this, char *key, data_t **data);
-int
-dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data);
data_t *
dict_get(dict_t *this, char *key);
data_t *
@@ -270,8 +267,6 @@ int
dict_remove_foreach_fn(dict_t *d, char *k, data_t *v, void *tmp);
dict_t *
dict_copy(dict_t *this, dict_t *new);
-dict_t *
-get_new_dict(void);
int
dict_keys_join(void *value, int size, dict_t *dict,
int (*filter_fn)(char *key));
@@ -391,6 +386,11 @@ GF_MUST_CHECK int
dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static);
GF_MUST_CHECK int
dict_get_iatt(dict_t *this, char *key, struct iatt *iatt);
+GF_MUST_CHECK int
+dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata,
+ bool is_static);
+GF_MUST_CHECK int
+dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata);
void
dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain);
@@ -414,4 +414,7 @@ are_dicts_equal(dict_t *one, dict_t *two,
gf_boolean_t (*value_ignore)(char *k));
int
dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result);
+
+int
+dict_serialized_length_lk(dict_t *this);
#endif
diff --git a/libglusterfs/src/event-history.h b/libglusterfs/src/glusterfs/event-history.h
index 5f0776ba9b0..f0e0422418e 100644
--- a/libglusterfs/src/event-history.h
+++ b/libglusterfs/src/glusterfs/event-history.h
@@ -11,8 +11,10 @@
#ifndef _EH_H
#define _EH_H
-#include "mem-types.h"
-#include "circ-buff.h"
+#include <pthread.h> // for pthread_mutex_t
+#include <stddef.h> // for size_t
+#include "glusterfs/circ-buff.h" // for buffer_t, circular_buffer_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t
struct event_hist {
buffer_t *buffer;
diff --git a/libglusterfs/src/events.h b/libglusterfs/src/glusterfs/events.h
index 74c5326427b..74c5326427b 100644
--- a/libglusterfs/src/events.h
+++ b/libglusterfs/src/glusterfs/events.h
diff --git a/libglusterfs/src/fd-lk.h b/libglusterfs/src/glusterfs/fd-lk.h
index 735f05ec46c..76cc680306a 100644
--- a/libglusterfs/src/fd-lk.h
+++ b/libglusterfs/src/glusterfs/fd-lk.h
@@ -11,14 +11,10 @@
#ifndef _FD_LK_H
#define _FD_LK_H
-#include "fd.h"
-#include "locking.h"
-#include "list.h"
-#include "logging.h"
-#include "mem-pool.h"
-#include "mem-types.h"
-#include "glusterfs.h"
-#include "common-utils.h"
+#include "glusterfs/fd.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/list.h"
+#include "glusterfs/glusterfs.h"
#define get_lk_type(type) \
type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK")
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/glusterfs/fd.h
index e885ecb7786..3ffaaa60504 100644
--- a/libglusterfs/src/fd.h
+++ b/libglusterfs/src/glusterfs/fd.h
@@ -11,13 +11,12 @@
#ifndef _FD_H
#define _FD_H
-#include "list.h"
+#include "glusterfs/list.h"
#include <sys/types.h>
#include <unistd.h>
-#include "glusterfs.h"
-#include "locking.h"
-#include "fd-lk.h"
-#include "common-utils.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/fd-lk.h"
#define GF_ANON_FD_NO -2
#define GF_ANON_FD_FLAGS (O_RDWR | O_LARGEFILE)
@@ -77,8 +76,8 @@ typedef struct _fdtable fdtable_t;
*/
#define GF_FDENTRY_ALLOCATED -2
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
void
gf_fd_put(fdtable_t *fdtable, int32_t fd);
@@ -107,6 +106,9 @@ fd_ref(fd_t *fd);
void
fd_unref(fd_t *fd);
+void
+fd_close(fd_t *fd);
+
fd_t *
fd_create(struct _inode *inode, pid_t pid);
diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/glusterfs/gf-dirent.h
index 95403e9eabd..e358da30f58 100644
--- a/libglusterfs/src/gf-dirent.h
+++ b/libglusterfs/src/glusterfs/gf-dirent.h
@@ -11,8 +11,8 @@
#ifndef _GF_DIRENT_H
#define _GF_DIRENT_H
-#include "iatt.h"
-#include "inode.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/inode.h"
#define gf_dirent_size(name) (sizeof(gf_dirent_t) + strlen(name) + 1)
diff --git a/libglusterfs/src/gf-event.h b/libglusterfs/src/glusterfs/gf-event.h
index 5d92a2dd285..40f8fbdf10a 100644
--- a/libglusterfs/src/gf-event.h
+++ b/libglusterfs/src/glusterfs/gf-event.h
@@ -12,6 +12,7 @@
#define _GF_EVENT_H_
#include <pthread.h>
+#include "common-utils.h"
#include "list.h"
struct event_pool;
@@ -23,14 +24,17 @@ struct event_data {
int gen;
} __attribute__((__packed__, __may_alias__));
-typedef int (*event_handler_t)(int fd, int idx, int gen, void *data,
- int poll_in, int poll_out, int poll_err,
- char event_thread_exit);
+typedef void (*event_handler_t)(int fd, int idx, int gen, void *data,
+ int poll_in, int poll_out, int poll_err,
+ char event_thread_exit);
#define EVENT_EPOLL_TABLES 1024
#define EVENT_EPOLL_SLOTS 1024
#define EVENT_MAX_THREADS 1024
+/* See rpcsvc.h to check why. */
+GF_STATIC_ASSERT(EVENT_MAX_THREADS % __BITS_PER_LONG == 0);
+
struct event_pool {
struct event_ops *ops;
@@ -110,26 +114,27 @@ struct event_ops {
};
struct event_pool *
-event_pool_new(int count, int eventthreadcount);
+gf_event_pool_new(int count, int eventthreadcount);
int
-event_select_on(struct event_pool *event_pool, int fd, int idx, int poll_in,
- int poll_out);
+gf_event_select_on(struct event_pool *event_pool, int fd, int idx, int poll_in,
+ int poll_out);
int
-event_register(struct event_pool *event_pool, int fd, event_handler_t handler,
- void *data, int poll_in, int poll_out, char notify_poller_death);
+gf_event_register(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death);
int
-event_unregister(struct event_pool *event_pool, int fd, int idx);
+gf_event_unregister(struct event_pool *event_pool, int fd, int idx);
int
-event_unregister_close(struct event_pool *event_pool, int fd, int idx);
+gf_event_unregister_close(struct event_pool *event_pool, int fd, int idx);
int
-event_dispatch(struct event_pool *event_pool);
+gf_event_dispatch(struct event_pool *event_pool);
int
-event_reconfigure_threads(struct event_pool *event_pool, int value);
+gf_event_reconfigure_threads(struct event_pool *event_pool, int value);
int
-event_pool_destroy(struct event_pool *event_pool);
+gf_event_pool_destroy(struct event_pool *event_pool);
int
-event_dispatch_destroy(struct event_pool *event_pool);
+gf_event_dispatch_destroy(struct event_pool *event_pool);
int
-event_handled(struct event_pool *event_pool, int fd, int idx, int gen);
+gf_event_handled(struct event_pool *event_pool, int fd, int idx, int gen);
#endif /* _GF_EVENT_H_ */
diff --git a/libglusterfs/src/gidcache.h b/libglusterfs/src/glusterfs/gidcache.h
index 3a7de47dec7..ddaabd765b5 100644
--- a/libglusterfs/src/gidcache.h
+++ b/libglusterfs/src/glusterfs/gidcache.h
@@ -11,8 +11,8 @@
#ifndef __GIDCACHE_H__
#define __GIDCACHE_H__
-#include "glusterfs.h"
-#include "locking.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h"
/*
* TBD: make the cache size tunable
diff --git a/libglusterfs/src/glfs-message-id.h b/libglusterfs/src/glusterfs/glfs-message-id.h
index 001f4abdf67..a1a16ca1efb 100644
--- a/libglusterfs/src/glfs-message-id.h
+++ b/libglusterfs/src/glusterfs/glfs-message-id.h
@@ -93,6 +93,7 @@ enum _msgid_comp {
GLFS_MSGID_COMP(TEMPLATE, 1),
GLFS_MSGID_COMP(UTIME, 1),
GLFS_MSGID_COMP(SNAPVIEW_SERVER, 1),
+ GLFS_MSGID_COMP(CVLT, 1),
/* --- new segments for messages goes above this line --- */
GLFS_MSGID_END
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/glusterfs/globals.h
index 4fbe22b2c28..b22eaae6c2f 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/glusterfs/globals.h
@@ -45,7 +45,7 @@
1 /* MIN is the fresh start op-version, mostly \
should not change */
#define GD_OP_VERSION_MAX \
- GD_OP_VERSION_6_0 /* MAX VERSION is the maximum \
+ GD_OP_VERSION_9_0 /* MAX VERSION is the maximum \
count in VME table, should \
keep changing with \
introduction of newer \
@@ -111,12 +111,23 @@
#define GD_OP_VERSION_5_0 50000 /* Op-version for GlusterFS 5.0 */
+#define GD_OP_VERSION_5_4 50400 /* Op-version for GlusterFS 5.4 */
+
#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */
+#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
+#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
+#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */
+#define GD_OP_VERSION_7_3 70300 /* Op-version for GlusterFS 7.3 */
+
+#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */
+
+#define GD_OP_VERSION_9_0 90000 /* Op-version for GlusterFS 9.0 */
+
#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0
-#include "xlator.h"
-#include "options.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/options.h"
/* THIS */
#define THIS (*__glusterfs_this_location())
@@ -126,7 +137,7 @@ xlator_t **
__glusterfs_this_location(void);
xlator_t *
glusterfs_this_get(void);
-int
+void
glusterfs_this_set(xlator_t *);
extern xlator_t global_xlator;
@@ -135,13 +146,11 @@ extern struct volume_options global_xl_options[];
/* syncopctx */
void *
syncopctx_getctx(void);
-int
-syncopctx_setctx(void *ctx);
/* task */
void *
synctask_get(void);
-int
+void
synctask_set(void *);
/* uuid_buf */
@@ -160,6 +169,9 @@ glusterfs_leaseid_exist(void);
int
glusterfs_globals_init(glusterfs_ctx_t *ctx);
+void
+gf_thread_needs_cleanup(void);
+
struct tvec_base *
glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx);
void
diff --git a/libglusterfs/src/glusterfs-acl.h b/libglusterfs/src/glusterfs/glusterfs-acl.h
index 8c731fc771b..987bf5fab0b 100644
--- a/libglusterfs/src/glusterfs-acl.h
+++ b/libglusterfs/src/glusterfs/glusterfs-acl.h
@@ -24,7 +24,7 @@
#include <stdint.h>
#include <sys/types.h> /* For uid_t */
-#include "locking.h" /* For gf_lock_t in struct posix_acl_conf */
+#include "glusterfs/locking.h" /* For gf_lock_t in struct posix_acl_conf */
#define ACL_PROGRAM 100227
#define ACLV3_VERSION 3
@@ -143,7 +143,7 @@ gf_posix_acl_get_key(const acl_type_t type)
return acl_key;
}
-static inline const acl_type_t
+static inline acl_type_t
gf_posix_acl_get_type(const char *key)
{
acl_type_t type = 0;
diff --git a/libglusterfs/src/glusterfs/glusterfs-fops.h b/libglusterfs/src/glusterfs/glusterfs-fops.h
new file mode 100644
index 00000000000..030b2701608
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glusterfs-fops.h
@@ -0,0 +1,241 @@
+/*
+ Copyright (c) 2008-2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_FOPS_H_
+#define _GLUSTERFS_FOPS_H_
+
+#include <glusterfs/compat.h>
+
+enum glusterfs_fop_t {
+ GF_FOP_NULL = 0,
+ GF_FOP_STAT = 0 + 1,
+ GF_FOP_READLINK = 0 + 2,
+ GF_FOP_MKNOD = 0 + 3,
+ GF_FOP_MKDIR = 0 + 4,
+ GF_FOP_UNLINK = 0 + 5,
+ GF_FOP_RMDIR = 0 + 6,
+ GF_FOP_SYMLINK = 0 + 7,
+ GF_FOP_RENAME = 0 + 8,
+ GF_FOP_LINK = 0 + 9,
+ GF_FOP_TRUNCATE = 0 + 10,
+ GF_FOP_OPEN = 0 + 11,
+ GF_FOP_READ = 0 + 12,
+ GF_FOP_WRITE = 0 + 13,
+ GF_FOP_STATFS = 0 + 14,
+ GF_FOP_FLUSH = 0 + 15,
+ GF_FOP_FSYNC = 0 + 16,
+ GF_FOP_SETXATTR = 0 + 17,
+ GF_FOP_GETXATTR = 0 + 18,
+ GF_FOP_REMOVEXATTR = 0 + 19,
+ GF_FOP_OPENDIR = 0 + 20,
+ GF_FOP_FSYNCDIR = 0 + 21,
+ GF_FOP_ACCESS = 0 + 22,
+ GF_FOP_CREATE = 0 + 23,
+ GF_FOP_FTRUNCATE = 0 + 24,
+ GF_FOP_FSTAT = 0 + 25,
+ GF_FOP_LK = 0 + 26,
+ GF_FOP_LOOKUP = 0 + 27,
+ GF_FOP_READDIR = 0 + 28,
+ GF_FOP_INODELK = 0 + 29,
+ GF_FOP_FINODELK = 0 + 30,
+ GF_FOP_ENTRYLK = 0 + 31,
+ GF_FOP_FENTRYLK = 0 + 32,
+ GF_FOP_XATTROP = 0 + 33,
+ GF_FOP_FXATTROP = 0 + 34,
+ GF_FOP_FGETXATTR = 0 + 35,
+ GF_FOP_FSETXATTR = 0 + 36,
+ GF_FOP_RCHECKSUM = 0 + 37,
+ GF_FOP_SETATTR = 0 + 38,
+ GF_FOP_FSETATTR = 0 + 39,
+ GF_FOP_READDIRP = 0 + 40,
+ GF_FOP_FORGET = 0 + 41,
+ GF_FOP_RELEASE = 0 + 42,
+ GF_FOP_RELEASEDIR = 0 + 43,
+ GF_FOP_GETSPEC = 0 + 44,
+ GF_FOP_FREMOVEXATTR = 0 + 45,
+ GF_FOP_FALLOCATE = 0 + 46,
+ GF_FOP_DISCARD = 0 + 47,
+ GF_FOP_ZEROFILL = 0 + 48,
+ GF_FOP_IPC = 0 + 49,
+ GF_FOP_SEEK = 0 + 50,
+ GF_FOP_LEASE = 0 + 51,
+ GF_FOP_COMPOUND = 0 + 52,
+ GF_FOP_GETACTIVELK = 0 + 53,
+ GF_FOP_SETACTIVELK = 0 + 54,
+ GF_FOP_PUT = 0 + 55,
+ GF_FOP_ICREATE = 0 + 56,
+ GF_FOP_NAMELINK = 0 + 57,
+ GF_FOP_COPY_FILE_RANGE = 0 + 58,
+ GF_FOP_MAXVALUE = 0 + 59,
+};
+typedef enum glusterfs_fop_t glusterfs_fop_t;
+
+enum glusterfs_event_t {
+ GF_EVENT_PARENT_UP = 1,
+ GF_EVENT_POLLIN = 1 + 1,
+ GF_EVENT_POLLOUT = 1 + 2,
+ GF_EVENT_POLLERR = 1 + 3,
+ GF_EVENT_CHILD_UP = 1 + 4,
+ GF_EVENT_CHILD_DOWN = 1 + 5,
+ GF_EVENT_CHILD_CONNECTING = 1 + 6,
+ GF_EVENT_CLEANUP = 9,
+ GF_EVENT_TRANSPORT_CONNECTED = 9 + 1,
+ GF_EVENT_VOLFILE_MODIFIED = 9 + 2,
+ GF_EVENT_GRAPH_NEW = 9 + 3,
+ GF_EVENT_TRANSLATOR_INFO = 9 + 4,
+ GF_EVENT_TRANSLATOR_OP = 9 + 5,
+ GF_EVENT_AUTH_FAILED = 9 + 6,
+ GF_EVENT_VOLUME_DEFRAG = 9 + 7,
+ GF_EVENT_PARENT_DOWN = 9 + 8,
+ GF_EVENT_VOLUME_BARRIER_OP = 9 + 9,
+ GF_EVENT_UPCALL = 9 + 10,
+ GF_EVENT_SCRUB_STATUS = 9 + 11,
+ GF_EVENT_SOME_DESCENDENT_DOWN = 9 + 12,
+ GF_EVENT_SCRUB_ONDEMAND = 9 + 13,
+ GF_EVENT_SOME_DESCENDENT_UP = 9 + 14,
+ GF_EVENT_CHILD_PING = 9 + 15,
+ GF_EVENT_MAXVAL = 9 + 16,
+};
+typedef enum glusterfs_event_t glusterfs_event_t;
+
+enum gf_op_type_t {
+ GF_OP_TYPE_NULL = 0,
+ GF_OP_TYPE_FOP = 0 + 1,
+ GF_OP_TYPE_MGMT = 0 + 2,
+ GF_OP_TYPE_MAX = 0 + 3,
+};
+typedef enum gf_op_type_t gf_op_type_t;
+
+enum glusterfs_lk_cmds_t {
+ GF_LK_GETLK = 0,
+ GF_LK_SETLK = 0 + 1,
+ GF_LK_SETLKW = 0 + 2,
+ GF_LK_RESLK_LCK = 0 + 3,
+ GF_LK_RESLK_LCKW = 0 + 4,
+ GF_LK_RESLK_UNLCK = 0 + 5,
+ GF_LK_GETLK_FD = 0 + 6,
+};
+typedef enum glusterfs_lk_cmds_t glusterfs_lk_cmds_t;
+
+enum glusterfs_lk_types_t {
+ GF_LK_F_RDLCK = 0,
+ GF_LK_F_WRLCK = 0 + 1,
+ GF_LK_F_UNLCK = 0 + 2,
+ GF_LK_EOL = 0 + 3,
+};
+typedef enum glusterfs_lk_types_t glusterfs_lk_types_t;
+
+enum gf_lease_types_t {
+ NONE = 0,
+ GF_RD_LEASE = 1,
+ GF_RW_LEASE = 2,
+ GF_LEASE_MAX_TYPE = 2 + 1,
+};
+typedef enum gf_lease_types_t gf_lease_types_t;
+
+enum gf_lease_cmds_t {
+ GF_GET_LEASE = 1,
+ GF_SET_LEASE = 2,
+ GF_UNLK_LEASE = 3,
+};
+typedef enum gf_lease_cmds_t gf_lease_cmds_t;
+
+#define LEASE_ID_SIZE 16 /* 128bits */
+
+struct gf_lease {
+ gf_lease_cmds_t cmd;
+ gf_lease_types_t lease_type;
+ char lease_id[LEASE_ID_SIZE];
+ u_int lease_flags;
+};
+typedef struct gf_lease gf_lease;
+
+enum glusterfs_lk_recovery_cmds_t {
+ F_RESLK_LCK = 200,
+ F_RESLK_LCKW = 200 + 1,
+ F_RESLK_UNLCK = 200 + 2,
+ F_GETLK_FD = 200 + 3,
+};
+typedef enum glusterfs_lk_recovery_cmds_t glusterfs_lk_recovery_cmds_t;
+
+enum gf_lk_domain_t {
+ GF_LOCK_POSIX = 0,
+ GF_LOCK_INTERNAL = 1,
+};
+typedef enum gf_lk_domain_t gf_lk_domain_t;
+
+enum entrylk_cmd {
+ ENTRYLK_LOCK = 0,
+ ENTRYLK_UNLOCK = 1,
+ ENTRYLK_LOCK_NB = 2,
+};
+typedef enum entrylk_cmd entrylk_cmd;
+
+enum entrylk_type {
+ ENTRYLK_RDLCK = 0,
+ ENTRYLK_WRLCK = 1,
+};
+typedef enum entrylk_type entrylk_type;
+#define GF_MAX_LOCK_OWNER_LEN 1024 /* 1kB as per NLM */
+#define GF_LKOWNER_BUF_SIZE \
+ ((GF_MAX_LOCK_OWNER_LEN * 2) + (GF_MAX_LOCK_OWNER_LEN / 8))
+
+struct gf_lkowner_t {
+ int len;
+ char data[GF_MAX_LOCK_OWNER_LEN];
+};
+typedef struct gf_lkowner_t gf_lkowner_t;
+
+enum gf_xattrop_flags_t {
+ GF_XATTROP_ADD_ARRAY = 0,
+ GF_XATTROP_ADD_ARRAY64 = 1,
+ GF_XATTROP_OR_ARRAY = 2,
+ GF_XATTROP_AND_ARRAY = 3,
+ GF_XATTROP_GET_AND_SET = 4,
+ GF_XATTROP_ADD_ARRAY_WITH_DEFAULT = 5,
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT = 6,
+};
+typedef enum gf_xattrop_flags_t gf_xattrop_flags_t;
+
+enum gf_seek_what_t {
+ GF_SEEK_DATA = 0,
+ GF_SEEK_HOLE = 1,
+};
+typedef enum gf_seek_what_t gf_seek_what_t;
+
+enum gf_upcall_flags_t {
+ GF_UPCALL_NULL = 0,
+ GF_UPCALL = 1,
+ GF_UPCALL_CI_STAT = 2,
+ GF_UPCALL_CI_XATTR = 3,
+ GF_UPCALL_CI_RENAME = 4,
+ GF_UPCALL_CI_NLINK = 5,
+ GF_UPCALL_CI_FORGET = 6,
+ GF_UPCALL_LEASE_RECALL = 7,
+ GF_UPCALL_FLAGS_MAXVALUE = 8,
+};
+typedef enum gf_upcall_flags_t gf_upcall_flags_t;
+
+enum gf_dict_data_type_t {
+ GF_DATA_TYPE_UNKNOWN = 0,
+ GF_DATA_TYPE_STR_OLD = 1,
+ GF_DATA_TYPE_INT = 2,
+ GF_DATA_TYPE_UINT = 3,
+ GF_DATA_TYPE_DOUBLE = 4,
+ GF_DATA_TYPE_STR = 5,
+ GF_DATA_TYPE_PTR = 6,
+ GF_DATA_TYPE_GFUUID = 7,
+ GF_DATA_TYPE_IATT = 8,
+ GF_DATA_TYPE_MDATA = 9,
+ GF_DATA_TYPE_MAX = 10,
+};
+typedef enum gf_dict_data_type_t gf_dict_data_type_t;
+
+#endif /* !_GLUSTERFS_FOPS_H */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
index 4d5bac322fd..e6425618b7f 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -14,6 +14,7 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
@@ -30,32 +31,21 @@
#include <limits.h> /* For PATH_MAX */
#include <openssl/sha.h>
-#include "glusterfs-fops.h" /* generated XDR values for FOPs */
-
-#ifndef IXDR_GET_LONG
-#define IXDR_GET_LONG(buf) ((long)IXDR_GET_U_INT32(buf))
-#endif
-#ifndef IXDR_PUT_LONG
-#define IXDR_PUT_LONG(buf, v) ((long)IXDR_PUT_INT32(buf, (long)(v)))
-#endif
-#ifndef IXDR_GET_U_LONG
-#define IXDR_GET_U_LONG(buf) ((u_long)IXDR_GET_LONG(buf))
-#endif
-#ifndef IXDR_PUT_U_LONG
-#define IXDR_PUT_U_LONG(buf, v) IXDR_PUT_LONG(buf, (long)(v))
-#endif
-
-#include "list.h"
-#include "locking.h"
-#include "logging.h"
-#include "lkowner.h"
-#include "compat-uuid.h"
-#include "refcount.h"
-#include "atomic.h"
+#include "glusterfs/glusterfs-fops.h"
+#include "glusterfs/list.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/refcount.h"
+#include "glusterfs/atomic.h"
#define GF_YES 1
#define GF_NO 0
+#define IS_ERROR(ret) ((ret) < 0)
+#define IS_SUCCESS(ret) ((ret) >= 0)
+
#ifndef O_LARGEFILE
/* savannah bug #20053, patch for compiling on darwin */
#define O_LARGEFILE 0100000 /* from bits/fcntl.h */
@@ -90,7 +80,7 @@
#define GLUSTERD_MAX_SNAP_NAME 255
#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 1024
-
+#define GLUSTERD_BRICK_SERVERS "cluster.brick-vol-servers"
#define SLEN(str) (sizeof(str) - 1)
#define ZR_MOUNTPOINT_OPT "mountpoint"
@@ -178,6 +168,8 @@
#define GLUSTERFS_INTERNAL_FOP_KEY "glusterfs-internal-fop"
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
/* GlusterFS Internal FOP Indicator flags
* (To pass information on the context in which a paritcular
* fop is performed between translators)
@@ -214,6 +206,8 @@ enum gf_internal_fop_indicator {
#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
#define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
#define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count"
+#define GLUSTERFS_INODELK_DOM_PREFIX "glusterfs.inodelk-dom-prefix"
+#define GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS "glusterfs.multi-dom-lk-cnt-req"
#define GFID_TO_PATH_KEY "glusterfs.gfid2path"
#define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime"
#define GF_XATTR_XTIME_PATTERN "trusted.glusterfs.*.xtime"
@@ -226,6 +220,9 @@ enum gf_internal_fop_indicator {
#define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup"
#define QUOTA_READ_ONLY_KEY "trusted.glusterfs.quota.read-only"
+/* ctime related */
+#define CTIME_MDATA_XDATA_KEY "set-ctime-mdata"
+
/* afr related */
#define AFR_XATTR_PREFIX "trusted.afr"
@@ -270,7 +267,7 @@ enum gf_internal_fop_indicator {
#define GLUSTERFS_RPC_REPLY_SIZE 24
-#define STARTING_EVENT_THREADS 1
+#define STARTING_EVENT_THREADS 2
#define DEFAULT_VAR_RUN_DIRECTORY DATADIR "/run/gluster"
#define DEFAULT_GLUSTERFSD_MISC_DIRETORY DATADIR "/lib/misc/glusterfsd"
@@ -312,7 +309,6 @@ enum gf_internal_fop_indicator {
#define DHT_SKIP_NON_LINKTO_UNLINK "unlink-only-if-dht-linkto-file"
#define TIER_SKIP_NON_LINKTO_UNLINK "unlink-only-if-tier-linkto-file"
-#define TIER_LINKFILE_GFID "tier-linkfile-gfid"
#define DHT_SKIP_OPEN_FD_UNLINK "dont-unlink-for-open-fd"
#define DHT_IATT_IN_XDATA_KEY "dht-get-iatt-in-xattr"
#define DHT_MODE_IN_XDATA_KEY "dht-get-mode-in-xattr"
@@ -325,6 +321,8 @@ enum gf_internal_fop_indicator {
#define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
#define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
+#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
+
#define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
@@ -361,6 +359,10 @@ enum gf_internal_fop_indicator {
} while (0)
#define GF_CS_OBJECT_SIZE "trusted.glusterfs.cs.object_size"
+#define GF_CS_BLOCK_SIZE "trusted.glusterfs.cs.block_size"
+#define GF_CS_NUM_BLOCKS "trusted.glusterfs.cs.num_blocks"
+
+#define GF_CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid"
#define GF_CS_OBJECT_UPLOAD_COMPLETE "trusted.glusterfs.csou.complete"
#define GF_CS_OBJECT_REMOTE "trusted.glusterfs.cs.remote"
@@ -369,6 +371,10 @@ enum gf_internal_fop_indicator {
#define GF_CS_OBJECT_STATUS "trusted.glusterfs.cs.status"
#define GF_CS_OBJECT_REPAIR "trusted.glusterfs.cs.repair"
+#define gf_boolean_t bool
+#define _gf_false false
+#define _gf_true true
+
typedef enum {
GF_CS_LOCAL = 1,
GF_CS_REMOTE = 2,
@@ -419,7 +425,7 @@ static const char *const FOP_PRI_STRINGS[] = {"HIGH", "NORMAL", "LOW", "LEAST"};
static inline const char *
fop_pri_to_string(gf_fop_pri_t pri)
{
- if (pri < 0)
+ if (IS_ERROR(pri))
return "UNSPEC";
if (pri >= GF_FOP_PRI_MAX)
@@ -460,6 +466,8 @@ typedef struct _server_cmdline server_cmdline_t;
#define GF_OPTION_DISABLE _gf_false
#define GF_OPTION_DEFERRED 2
+typedef enum { _gf_none, _gf_memcheck, _gf_drd } gf_valgrind_tool;
+
struct _cmd_args {
/* basic options */
char *volfile_server;
@@ -524,6 +532,8 @@ struct _cmd_args {
pid_t client_pid;
int client_pid_set;
unsigned uid_map_root;
+ int32_t lru_limit;
+ int32_t invalidate_limit;
int background_qlen;
int congestion_threshold;
char *fuse_mountopts;
@@ -550,7 +560,8 @@ struct _cmd_args {
/* Run this process with valgrind? Might want to prevent calling
* functions that prevent valgrind from working correctly, like
* dlclose(). */
- int valgrind;
+ gf_valgrind_tool vgtool;
+
int localtime_logging;
/* For the subdir mount */
@@ -566,21 +577,33 @@ struct _cmd_args {
uint32_t attr_times_granularity;
int fuse_flush_handle_interrupt;
+ int fuse_auto_inval;
+
+ bool global_threading;
+ bool brick_mux;
+
+ uint32_t fuse_dev_eperm_ratelimit_ns;
};
typedef struct _cmd_args cmd_args_t;
struct _glusterfs_graph {
struct list_head list;
- char graph_uuid[128];
struct timeval dob;
void *first;
void *top; /* selected by -n */
- uint32_t leaf_count;
int xl_count;
int id; /* Used in logging */
int used; /* Should be set when fuse gets
first CHILD_UP */
uint32_t volfile_checksum;
+ uint32_t leaf_count;
+ void *last_xl; /* Stores the last xl of the graph, as of now only populated
+ in client multiplexed code path */
+ pthread_mutex_t mutex;
+ pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */
+ int parent_down;
+ char graph_uuid[128];
+ char volume_id[GF_UUID_BUF_SIZE];
};
typedef struct _glusterfs_graph glusterfs_graph_t;
@@ -710,6 +733,15 @@ struct _glusterfs_ctx {
} stats;
struct list_head volfile_list;
+ /* Add members to manage janitor threads for cleanup fd */
+ struct list_head janitor_fds;
+ pthread_cond_t fd_cond;
+ pthread_mutex_t fd_lock;
+ pthread_t janitor;
+ /* The variable is use to save total posix xlator count */
+ uint32_t pxl_count;
+
+ char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
};
typedef struct _glusterfs_ctx glusterfs_ctx_t;
@@ -717,7 +749,8 @@ typedef struct {
char volfile_checksum[SHA256_DIGEST_LENGTH];
char vol_id[NAME_MAX + 1];
struct list_head volfile_list;
-
+ glusterfs_graph_t *graph;
+ FILE *pidfp;
} gf_volfile_t;
glusterfs_ctx_t *
@@ -800,4 +833,6 @@ gf_free_mig_locks(lock_migration_info_t *locks);
int
glusterfs_read_secure_access_file(void);
+int
+glusterfs_graph_fini(glusterfs_graph_t *graph);
#endif /* _GLUSTERFS_H */
diff --git a/libglusterfs/src/graph-utils.h b/libglusterfs/src/glusterfs/graph-utils.h
index c0e87268c5c..247f1a55d5a 100644
--- a/libglusterfs/src/graph-utils.h
+++ b/libglusterfs/src/glusterfs/graph-utils.h
@@ -13,10 +13,6 @@
int
glusterfs_graph_print_file(FILE *file, glusterfs_graph_t *graph);
-
-char *
-glusterfs_graph_print_buf(glusterfs_graph_t *graph);
-
int
glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl);
void
diff --git a/libglusterfs/src/hashfn.h b/libglusterfs/src/glusterfs/hashfn.h
index a4cb33f072a..6e92e706d8c 100644
--- a/libglusterfs/src/hashfn.h
+++ b/libglusterfs/src/glusterfs/hashfn.h
@@ -20,6 +20,4 @@ SuperFastHash(const char *data, int32_t len);
uint32_t
gf_dm_hashfn(const char *msg, int len);
-uint32_t
-ReallySimpleHash(char *path, int len);
#endif /* __HASHFN_H__ */
diff --git a/libglusterfs/src/iatt.h b/libglusterfs/src/glusterfs/iatt.h
index db366472970..f03d68b02f0 100644
--- a/libglusterfs/src/iatt.h
+++ b/libglusterfs/src/glusterfs/iatt.h
@@ -18,8 +18,8 @@
#include <sys/stat.h> /* for iatt <--> stat conversions */
#include <unistd.h>
-#include "compat.h"
-#include "compat-uuid.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/compat-uuid.h"
typedef enum {
IA_INVAL = 0,
@@ -92,6 +92,15 @@ struct old_iatt {
uint32_t ia_ctime_nsec;
};
+struct mdata_iatt {
+ int64_t ia_atime; /* last access time */
+ int64_t ia_mtime; /* last modification time */
+ int64_t ia_ctime; /* last status change time */
+ uint32_t ia_atime_nsec;
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime_nsec;
+};
+
/* 64-bit mask for valid members in struct iatt. */
#define IATT_TYPE 0x0000000000000001U
#define IATT_MODE 0x0000000000000002U
@@ -264,12 +273,10 @@ st_mode_prot_from_ia(ia_prot_t prot)
return prot_bit;
}
-static inline mode_t
-st_mode_from_ia(ia_prot_t prot, ia_type_t type)
+static inline uint32_t
+st_mode_type_from_ia(ia_type_t type)
{
- mode_t st_mode = 0;
uint32_t type_bit = 0;
- uint32_t prot_bit = 0;
switch (type) {
case IA_IFREG:
@@ -297,6 +304,17 @@ st_mode_from_ia(ia_prot_t prot, ia_type_t type)
break;
}
+ return type_bit;
+}
+
+static inline mode_t
+st_mode_from_ia(ia_prot_t prot, ia_type_t type)
+{
+ mode_t st_mode = 0;
+ uint32_t type_bit = 0;
+ uint32_t prot_bit = 0;
+
+ type_bit = st_mode_type_from_ia(type);
prot_bit = st_mode_prot_from_ia(prot);
st_mode = (type_bit | prot_bit);
@@ -304,6 +322,17 @@ st_mode_from_ia(ia_prot_t prot, ia_type_t type)
return st_mode;
}
+static inline void
+iatt_to_mdata(struct mdata_iatt *mdata, struct iatt *iatt)
+{
+ mdata->ia_atime = iatt->ia_atime;
+ mdata->ia_atime_nsec = iatt->ia_atime_nsec;
+ mdata->ia_mtime = iatt->ia_mtime;
+ mdata->ia_mtime_nsec = iatt->ia_mtime_nsec;
+ mdata->ia_ctime = iatt->ia_ctime;
+ mdata->ia_ctime_nsec = iatt->ia_ctime_nsec;
+}
+
static inline int
iatt_from_stat(struct iatt *iatt, struct stat *stat)
{
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/glusterfs/inode.h
index 5745ea643a3..4b28da510c7 100644
--- a/libglusterfs/src/inode.h
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -28,10 +28,10 @@ typedef struct _inode inode_t;
struct _dentry;
typedef struct _dentry dentry_t;
-#include "list.h"
-#include "iatt.h"
-#include "compat-uuid.h"
-#include "fd.h"
+#include "glusterfs/list.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/fd.h"
struct _inode_table {
pthread_mutex_t lock;
@@ -54,6 +54,17 @@ struct _inode_table {
struct mem_pool *dentry_pool; /* memory pool for dentrys */
struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
int ctxcount; /* number of slots in inode->ctx */
+
+ /* This is required for 'invalidation' when 'nlookup' would be used,
+ specially in case of fuse-bridge */
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *);
+ xlator_t *invalidator_xl;
+ struct list_head invalidate; /* inodes which are in invalidation queue */
+ uint32_t invalidate_size; /* count of inodes in invalidation list */
+
+ /* flag to indicate whether the cleanup of the inode
+ table started or not */
+ gf_boolean_t cleanup_started;
};
struct _dentry {
@@ -100,6 +111,8 @@ struct _inode {
struct list_head list; /* active/lru/purge */
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+ bool in_invalidate_list; /* Set if inode is in table invalidate list */
+ bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
};
#define UUID0_STR "00000000-0000-0000-0000-000000000000"
@@ -107,7 +120,12 @@ struct _inode {
#define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
inode_table_t *
-inode_table_new(size_t lru_limit, xlator_t *xl);
+inode_table_new(uint32_t lru_limit, xlator_t *xl);
+
+inode_table_t *
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl);
void
inode_table_destroy_all(glusterfs_ctx_t *ctx);
@@ -139,6 +157,8 @@ inode_lookup(inode_t *inode);
int
inode_forget(inode_t *inode, uint64_t nlookup);
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup);
int
inode_ref_reduce_by_n(inode_t *inode, uint64_t nref);
@@ -151,9 +171,6 @@ inode_rename(inode_table_t *table, inode_t *olddir, const char *oldname,
inode_t *newdir, const char *newname, inode_t *inode,
struct iatt *stbuf);
-dentry_t *
-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name);
-
inode_t *
inode_grep(inode_table_t *table, inode_t *parent, const char *name);
diff --git a/libglusterfs/src/iobuf.h b/libglusterfs/src/glusterfs/iobuf.h
index 41ac5dd191c..4bd443efd5e 100644
--- a/libglusterfs/src/iobuf.h
+++ b/libglusterfs/src/glusterfs/iobuf.h
@@ -11,11 +11,12 @@
#ifndef _IOBUF_H_
#define _IOBUF_H_
-#include "list.h"
-#include "common-utils.h"
-#include <pthread.h>
+#include <stddef.h> // for size_t
#include <sys/mman.h>
-#include <sys/uio.h>
+#include "glusterfs/atomic.h" // for gf_atomic_t
+#include <sys/uio.h> // for struct iovec
+#include "glusterfs/locking.h" // for gf_lock_t
+#include "glusterfs/list.h"
#define GF_VARIABLE_IOBUF_COUNT 32
@@ -94,14 +95,14 @@ struct iobuf_arena {
void *mem_base;
struct iobuf *iobufs; /* allocated iobufs list */
- int active_cnt;
- struct iobuf active; /* head node iobuf
- (unused by itself) */
- int passive_cnt;
+ struct iobuf active; /* head node iobuf
+ (unused by itself) */
struct iobuf passive; /* head node iobuf
(unused by itself) */
uint64_t alloc_cnt; /* total allocs in this pool */
- int max_active; /* max active buffers at a given time */
+ int active_cnt;
+ int passive_cnt;
+ int max_active; /* max active buffers at a given time */
};
struct iobuf_pool {
@@ -110,7 +111,6 @@ struct iobuf_pool {
arena */
size_t default_page_size; /* default size of iobuf */
- int arena_cnt;
struct list_head all_arenas;
struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
/* array of arenas. Each element of the array is a list of arenas
@@ -124,6 +124,7 @@ struct iobuf_pool {
uint64_t request_misses; /* mostly the requests for higher
value of iobufs */
+ int arena_cnt;
int rdma_device_count;
struct list_head *mr_list[GF_RDMA_DEVICE_COUNT];
void *device[GF_RDMA_DEVICE_COUNT];
diff --git a/libglusterfs/src/latency.h b/libglusterfs/src/glusterfs/latency.h
index 063ea291ee2..4d601bbcbd6 100644
--- a/libglusterfs/src/latency.h
+++ b/libglusterfs/src/glusterfs/latency.h
@@ -11,13 +11,23 @@
#ifndef __LATENCY_H__
#define __LATENCY_H__
-#include "glusterfs.h"
+#include <inttypes.h>
+#include <time.h>
-typedef struct fop_latency {
- double min; /* min time for the call (microseconds) */
- double max; /* max time for the call (microseconds) */
- double total; /* total time (microseconds) */
+typedef struct _gf_latency {
+ uint64_t min; /* min time for the call (nanoseconds) */
+ uint64_t max; /* max time for the call (nanoseconds) */
+ uint64_t total; /* total time (nanoseconds) */
uint64_t count;
-} fop_latency_t;
+} gf_latency_t;
+gf_latency_t *
+gf_latency_new(size_t n);
+
+void
+gf_latency_reset(gf_latency_t *lat);
+
+void
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end);
#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h
new file mode 100644
index 00000000000..cb31dd7614b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h
@@ -0,0 +1,245 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _LG_MESSAGES_H_
+#define _LG_MESSAGES_H_
+
+#include "glusterfs/glfs-message-id.h"
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ LIBGLUSTERFS, LG_MSG_ASPRINTF_FAILED, LG_MSG_INVALID_ENTRY,
+ LG_MSG_COUNT_LESS_THAN_ZERO, LG_MSG_COUNT_LESS_THAN_DATA_PAIRS,
+ LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ LG_MSG_KEY_OR_VALUE_NULL, LG_MSG_FAILED_TO_LOG_DICT,
+ LG_MSG_NULL_VALUE_IN_DICT, LG_MSG_DIR_OP_FAILED,
+ LG_MSG_STORE_HANDLE_CREATE_FAILED, LG_MSG_FILE_OP_FAILED,
+ LG_MSG_FILE_STAT_FAILED, LG_MSG_LOCK_FAILED, LG_MSG_UNLOCK_FAILED,
+ LG_MSG_DICT_SERIAL_FAILED, LG_MSG_DICT_UNSERIAL_FAILED, LG_MSG_NO_MEMORY,
+ LG_MSG_VOLUME_ERROR, LG_MSG_SUB_VOLUME_ERROR, LG_MSG_SYNTAX_ERROR,
+ LG_MSG_BACKTICK_PARSE_FAILED, LG_MSG_BUFFER_ERROR, LG_MSG_STRDUP_ERROR,
+ LG_MSG_HASH_FUNC_ERROR, LG_MSG_GET_BUCKET_FAILED, LG_MSG_INSERT_FAILED,
+ LG_MSG_OUT_OF_RANGE, LG_MSG_VALIDATE_RETURNS, LG_MSG_VALIDATE_REC_FAILED,
+ LG_MSG_RB_TABLE_CREATE_FAILED, LG_MSG_PATH_NOT_FOUND,
+ LG_MSG_EXPAND_FD_TABLE_FAILED, LG_MSG_MAPPING_FAILED,
+ LG_MSG_INIT_IOBUF_FAILED, LG_MSG_PAGE_SIZE_EXCEEDED, LG_MSG_ARENA_NOT_FOUND,
+ LG_MSG_IOBUF_NOT_FOUND, LG_MSG_POOL_NOT_FOUND, LG_MSG_SET_ATTRIBUTE_FAILED,
+ LG_MSG_READ_ATTRIBUTE_FAILED, LG_MSG_UNMOUNT_FAILED,
+ LG_MSG_LATENCY_MEASUREMENT_STATE, LG_MSG_NO_PERM, LG_MSG_NO_KEY,
+ LG_MSG_DICT_NULL, LG_MSG_INIT_TIMER_FAILED, LG_MSG_FD_ANONYMOUS_FAILED,
+ LG_MSG_FD_CREATE_FAILED, LG_MSG_BUFFER_FULL, LG_MSG_FWRITE_FAILED,
+ LG_MSG_PRINT_FAILED, LG_MSG_MEM_POOL_DESTROY,
+ LG_MSG_EXPAND_CLIENT_TABLE_FAILED, LG_MSG_DISCONNECT_CLIENT,
+ LG_MSG_PIPE_CREATE_FAILED, LG_MSG_SET_PIPE_FAILED,
+ LG_MSG_REGISTER_PIPE_FAILED, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS,
+ LG_MSG_INDEX_NOT_FOUND, LG_MSG_EPOLL_FD_CREATE_FAILED,
+ LG_MSG_SLOT_NOT_FOUND, LG_MSG_STALE_FD_FOUND, LG_MSG_GENERATION_MISMATCH,
+ LG_MSG_PTHREAD_KEY_CREATE_FAILED, LG_MSG_TRANSLATOR_INIT_FAILED,
+ LG_MSG_UUID_BUF_INIT_FAILED, LG_MSG_LKOWNER_BUF_INIT_FAILED,
+ LG_MSG_SYNCTASK_INIT_FAILED, LG_MSG_SYNCOPCTX_INIT_FAILED,
+ LG_MSG_GLOBAL_INIT_FAILED, LG_MSG_PTHREAD_FAILED, LG_MSG_DIR_IS_SYMLINK,
+ LG_MSG_RESOLVE_HOSTNAME_FAILED, LG_MSG_GETADDRINFO_FAILED,
+ LG_MSG_GETNAMEINFO_FAILED, LG_MSG_PATH_ERROR, LG_MSG_INET_PTON_FAILED,
+ LG_MSG_NEGATIVE_NUM_PASSED, LG_MSG_GETHOSTNAME_FAILED,
+ LG_MSG_RESERVED_PORTS_ERROR, LG_MSG_INVALID_PORT, LG_MSG_INVALID_FAMILY,
+ LG_MSG_CONVERSION_FAILED, LG_MSG_SKIP_HEADER_FAILED, LG_MSG_INVALID_LOG,
+ LG_MSG_UTIMES_FAILED, LG_MSG_BACKTRACE_SAVE_FAILED, LG_MSG_INIT_FAILED,
+ LG_MSG_VALIDATION_FAILED, LG_MSG_GRAPH_ERROR, LG_MSG_UNKNOWN_OPTIONS_FAILED,
+ LG_MSG_CTX_NULL, LG_MSG_TMPFILE_CREATE_FAILED, LG_MSG_DLOPEN_FAILED,
+ LG_MSG_LOAD_FAILED, LG_MSG_DLSYM_ERROR, LG_MSG_TREE_NOT_FOUND,
+ LG_MSG_PER_DENTRY, LG_MSG_DENTRY, LG_MSG_GETIFADDRS_FAILED,
+ LG_MSG_REGEX_OP_FAILED, LG_MSG_FRAME_ERROR, LG_MSG_SET_PARAM_FAILED,
+ LG_MSG_GET_PARAM_FAILED, LG_MSG_PREPARE_FAILED, LG_MSG_EXEC_FAILED,
+ LG_MSG_BINDING_FAILED, LG_MSG_DELETE_FAILED, LG_MSG_GET_ID_FAILED,
+ LG_MSG_CREATE_FAILED, LG_MSG_PARSE_FAILED, LG_MSG_GETCONTEXT_FAILED,
+ LG_MSG_UPDATE_FAILED, LG_MSG_QUERY_CALL_BACK_FAILED,
+ LG_MSG_GET_RECORD_FAILED, LG_MSG_DB_ERROR, LG_MSG_CONNECTION_ERROR,
+ LG_MSG_NOT_MULTITHREAD_MODE, LG_MSG_SKIP_PATH, LG_MSG_INVALID_FOP,
+ LG_MSG_QUERY_FAILED, LG_MSG_CLEAR_COUNTER_FAILED, LG_MSG_LOCK_LIST_FAILED,
+ LG_MSG_UNLOCK_LIST_FAILED, LG_MSG_ADD_TO_LIST_FAILED, LG_MSG_INIT_DB_FAILED,
+ LG_MSG_DELETE_FROM_LIST_FAILED, LG_MSG_CLOSE_CONNECTION_FAILED,
+ LG_MSG_INSERT_OR_UPDATE_FAILED, LG_MSG_FIND_OP_FAILED,
+ LG_MSG_CONNECTION_INIT_FAILED, LG_MSG_COMPLETED_TASK, LG_MSG_WAKE_UP_ZOMBIE,
+ LG_MSG_REWAITING_TASK, LG_MSG_SLEEP_ZOMBIE, LG_MSG_SWAPCONTEXT_FAILED,
+ LG_MSG_UNSUPPORTED_PLUGIN, LG_MSG_INVALID_DB_TYPE, LG_MSG_UNDERSIZED_BUF,
+ LG_MSG_DATA_CONVERSION_ERROR, LG_MSG_DICT_ERROR, LG_MSG_IOBUFS_NOT_FOUND,
+ LG_MSG_ENTRIES_NULL, LG_MSG_FD_NOT_FOUND_IN_FDTABLE,
+ LG_MSG_REALLOC_FOR_FD_PTR_FAILED, LG_MSG_DICT_SET_FAILED, LG_MSG_NULL_PTR,
+ LG_MSG_RBTHASH_INIT_BUCKET_FAILED, LG_MSG_ASSERTION_FAILED,
+ LG_MSG_HOSTNAME_NULL, LG_MSG_INVALID_IPV4_FORMAT,
+ LG_MSG_CTX_CLEANUP_STARTED, LG_MSG_TIMER_REGISTER_ERROR,
+ LG_MSG_PTR_HEADER_CORRUPTED, LG_MSG_INVALID_UPLINK, LG_MSG_CLIENT_NULL,
+ LG_MSG_XLATOR_DOES_NOT_IMPLEMENT, LG_MSG_DENTRY_NOT_FOUND,
+ LG_MSG_INODE_NOT_FOUND, LG_MSG_INODE_TABLE_NOT_FOUND,
+ LG_MSG_DENTRY_CREATE_FAILED, LG_MSG_INODE_CONTEXT_FREED,
+ LG_MSG_UNKNOWN_LOCK_TYPE, LG_MSG_UNLOCK_BEFORE_LOCK,
+ LG_MSG_LOCK_OWNER_ERROR, LG_MSG_MEMPOOL_PTR_NULL,
+ LG_MSG_QUOTA_XATTRS_MISSING, LG_MSG_INVALID_STRING, LG_MSG_BIND_REF,
+ LG_MSG_REF_COUNT, LG_MSG_INVALID_ARG, LG_MSG_VOL_OPTION_ADD,
+ LG_MSG_XLATOR_OPTION_INVALID, LG_MSG_GETTIMEOFDAY_FAILED,
+ LG_MSG_GRAPH_INIT_FAILED, LG_MSG_EVENT_NOTIFY_FAILED,
+ LG_MSG_ACTIVE_GRAPH_NULL, LG_MSG_VOLFILE_PARSE_ERROR, LG_MSG_FD_INODE_NULL,
+ LG_MSG_INVALID_VOLFILE_ENTRY, LG_MSG_PER_DENTRY_FAILED,
+ LG_MSG_PARENT_DENTRY_NOT_FOUND, LG_MSG_DENTRY_CYCLIC_LOOP,
+ LG_MSG_INVALID_POLL_IN, LG_MSG_INVALID_POLL_OUT, LG_MSG_EPOLL_FD_ADD_FAILED,
+ LG_MSG_EPOLL_FD_DEL_FAILED, LG_MSG_EPOLL_FD_MODIFY_FAILED,
+ LG_MSG_STARTED_EPOLL_THREAD, LG_MSG_EXITED_EPOLL_THREAD,
+ LG_MSG_START_EPOLL_THREAD_FAILED, LG_MSG_FALLBACK_TO_POLL,
+ LG_MSG_QUOTA_CONF_ERROR, LG_MSG_RBTHASH_GET_ENTRY_FAILED,
+ LG_MSG_RBTHASH_GET_BUCKET_FAILED, LG_MSG_RBTHASH_INSERT_FAILED,
+ LG_MSG_RBTHASH_INIT_ENTRY_FAILED, LG_MSG_TMPFILE_DELETE_FAILED,
+ LG_MSG_MEMPOOL_INVALID_FREE, LG_MSG_LOCK_FAILURE, LG_MSG_SET_LOG_LEVEL,
+ LG_MSG_VERIFY_PLATFORM, LG_MSG_RUNNER_LOG, LG_MSG_LEASEID_BUF_INIT_FAILED,
+ LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST,
+ LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED,
+ LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG,
+ LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE,
+ LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED,
+ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, LG_MSG_DUPLICATE_ENTRY,
+ LG_MSG_THREAD_NAME_TOO_LONG, LG_MSG_SET_THREAD_FAILED,
+ LG_MSG_THREAD_CREATE_FAILED, LG_MSG_FILE_DELETE_FAILED, LG_MSG_WRONG_VALUE,
+ LG_MSG_PATH_OPEN_FAILED, LG_MSG_DISPATCH_HANDLER_FAILED,
+ LG_MSG_READ_FILE_FAILED, LG_MSG_ENTRIES_NOT_PROVIDED,
+ LG_MSG_ENTRIES_PROVIDED, LG_MSG_UNKNOWN_OPTION_TYPE,
+ LG_MSG_OPTION_DEPRECATED, LG_MSG_INVALID_INIT, LG_MSG_OBJECT_NULL,
+ LG_MSG_GRAPH_NOT_SET, LG_MSG_FILENAME_NOT_SPECIFIED, LG_MSG_STRUCT_MISS,
+ LG_MSG_METHOD_MISS, LG_MSG_INPUT_DATA_NULL, LG_MSG_OPEN_LOGFILE_FAILED);
+
+#define LG_MSG_EPOLL_FD_CREATE_FAILED_STR "epoll fd creation failed"
+#define LG_MSG_INVALID_POLL_IN_STR "invalid poll_in value"
+#define LG_MSG_INVALID_POLL_OUT_STR "invalid poll_out value"
+#define LG_MSG_SLOT_NOT_FOUND_STR "could not find slot"
+#define LG_MSG_EPOLL_FD_ADD_FAILED_STR "failed to add fd to epoll"
+#define LG_MSG_EPOLL_FD_DEL_FAILED_STR "fail to delete fd to epoll"
+#define LG_MSG_EPOLL_FD_MODIFY_FAILED_STR "failed to modify fd events"
+#define LG_MSG_STALE_FD_FOUND_STR "stale fd found"
+#define LG_MSG_GENERATION_MISMATCH_STR "generation mismatch"
+#define LG_MSG_STARTED_EPOLL_THREAD_STR "Started thread with index"
+#define LG_MSG_EXITED_EPOLL_THREAD_STR "Exited thread"
+#define LG_MSG_DISPATCH_HANDLER_FAILED_STR "Failed to dispatch handler"
+#define LG_MSG_START_EPOLL_THREAD_FAILED_STR "Failed to start thread"
+#define LG_MSG_PIPE_CREATE_FAILED_STR "pipe creation failed"
+#define LG_MSG_SET_PIPE_FAILED_STR "could not set pipe to non blocking mode"
+#define LG_MSG_REGISTER_PIPE_FAILED_STR \
+ "could not register pipe fd with poll event loop"
+#define LG_MSG_POLL_IGNORE_MULTIPLE_THREADS_STR \
+ "Currently poll does not use multiple event processing threads, count " \
+ "ignored"
+#define LG_MSG_INDEX_NOT_FOUND_STR "index not found"
+#define LG_MSG_READ_FILE_FAILED_STR "read on file returned error"
+#define LG_MSG_RB_TABLE_CREATE_FAILED_STR "Failed to create rb table bucket"
+#define LG_MSG_HASH_FUNC_ERROR_STR "Hash function not given"
+#define LG_MSG_ENTRIES_NOT_PROVIDED_STR \
+ "Both mem-pool and expected entries not provided"
+#define LG_MSG_ENTRIES_PROVIDED_STR \
+ "Both mem-pool and expected entries are provided"
+#define LG_MSG_RBTHASH_INIT_BUCKET_FAILED_STR "failed to init buckets"
+#define LG_MSG_RBTHASH_GET_ENTRY_FAILED_STR "Failed to get entry from mem-pool"
+#define LG_MSG_RBTHASH_GET_BUCKET_FAILED_STR "Failed to get bucket"
+#define LG_MSG_RBTHASH_INSERT_FAILED_STR "Failed to insert entry"
+#define LG_MSG_RBTHASH_INIT_ENTRY_FAILED_STR "Failed to init entry"
+#define LG_MSG_FILE_STAT_FAILED_STR "failed to stat"
+#define LG_MSG_INET_PTON_FAILED_STR "inet_pton() failed"
+#define LG_MSG_INVALID_ENTRY_STR "Invalid arguments"
+#define LG_MSG_NEGATIVE_NUM_PASSED_STR "negative number passed"
+#define LG_MSG_PATH_ERROR_STR "Path manipulation failed"
+#define LG_MSG_FILE_OP_FAILED_STR "could not open/read file, getting ports info"
+#define LG_MSG_RESERVED_PORTS_ERROR_STR \
+ "Not able to get reserved ports, hence there is a possibility that " \
+ "glusterfs may consume reserved port"
+#define LG_MSG_INVALID_PORT_STR "invalid port"
+#define LG_MSG_GETNAMEINFO_FAILED_STR "Could not lookup hostname"
+#define LG_MSG_GETIFADDRS_FAILED_STR "getifaddrs() failed"
+#define LG_MSG_INVALID_FAMILY_STR "Invalid family"
+#define LG_MSG_CONVERSION_FAILED_STR "String conversion failed"
+#define LG_MSG_GETADDRINFO_FAILED_STR "error in getaddrinfo"
+#define LG_MSG_DUPLICATE_ENTRY_STR "duplicate entry for volfile-server"
+#define LG_MSG_PTHREAD_NAMING_FAILED_STR "Failed to compose thread name"
+#define LG_MSG_THREAD_NAME_TOO_LONG_STR \
+ "Thread name is too long. It has been truncated"
+#define LG_MSG_SET_THREAD_FAILED_STR "Could not set thread name"
+#define LG_MSG_THREAD_CREATE_FAILED_STR "Thread creation failed"
+#define LG_MSG_PTHREAD_ATTR_INIT_FAILED_STR \
+ "Thread attribute initialization failed"
+#define LG_MSG_SKIP_HEADER_FAILED_STR "Failed to skip header section"
+#define LG_MSG_INVALID_LOG_STR "Invalid log-format"
+#define LG_MSG_UTIMENSAT_FAILED_STR "utimenstat failed"
+#define LG_MSG_UTIMES_FAILED_STR "utimes failed"
+#define LG_MSG_FILE_DELETE_FAILED_STR "Unable to delete file"
+#define LG_MSG_BACKTRACE_SAVE_FAILED_STR "Failed to save the backtrace"
+#define LG_MSG_WRONG_VALUE_STR "wrong value"
+#define LG_MSG_DIR_OP_FAILED_STR "Failed to create directory"
+#define LG_MSG_DIR_IS_SYMLINK_STR "dir is symlink"
+#define LG_MSG_RESOLVE_HOSTNAME_FAILED_STR "couldnot resolve hostname"
+#define LG_MSG_PATH_OPEN_FAILED_STR "Unable to open path"
+#define LG_MSG_NO_MEMORY_STR "Error allocating memory"
+#define LG_MSG_EVENT_NOTIFY_FAILED_STR "notification failed"
+#define LG_MSG_PER_DENTRY_FAILED_STR "per dentry fn returned"
+#define LG_MSG_PARENT_DENTRY_NOT_FOUND_STR "parent not found"
+#define LG_MSG_DENTRY_CYCLIC_LOOP_STR \
+ "detected cyclic loop formation during inode linkage"
+#define LG_MSG_CTX_NULL_STR "_ctx not found"
+#define LG_MSG_DENTRY_NOT_FOUND_STR "dentry not found"
+#define LG_MSG_OUT_OF_RANGE_STR "out of range"
+#define LG_MSG_UNKNOWN_OPTION_TYPE_STR "unknown option type"
+#define LG_MSG_VALIDATE_RETURNS_STR "validate of returned"
+#define LG_MSG_OPTION_DEPRECATED_STR \
+ "option is deprecated, continuing with correction"
+#define LG_MSG_VALIDATE_REC_FAILED_STR "validate_rec failed"
+#define LG_MSG_MAPPING_FAILED_STR "mapping failed"
+#define LG_MSG_INIT_IOBUF_FAILED_STR "init failed"
+#define LG_MSG_ARENA_NOT_FOUND_STR "arena not found"
+#define LG_MSG_PAGE_SIZE_EXCEEDED_STR \
+ "page_size of iobufs in arena being added is greater than max available"
+#define LG_MSG_POOL_NOT_FOUND_STR "pool not found"
+#define LG_MSG_IOBUF_NOT_FOUND_STR "iobuf not found"
+#define LG_MSG_DLOPEN_FAILED_STR "DL open failed"
+#define LG_MSG_DLSYM_ERROR_STR "dlsym missing"
+#define LG_MSG_LOAD_FAILED_STR "Failed to load xlator options table"
+#define LG_MSG_INPUT_DATA_NULL_STR \
+ "input data is null. cannot update the lru limit of the inode table. " \
+ "continuing with older value."
+#define LG_MSG_INIT_FAILED_STR "No init() found"
+#define LG_MSG_VOLUME_ERROR_STR \
+ "Initialization of volume failed. review your volfile again."
+#define LG_MSG_TREE_NOT_FOUND_STR "Translator tree not found"
+#define LG_MSG_SET_LOG_LEVEL_STR "setting log level"
+#define LG_MSG_INVALID_INIT_STR \
+ "Invalid log-level. possible values are DEBUG|WARNING|ERROR|NONE|TRACE"
+#define LG_MSG_OBJECT_NULL_STR "object is null, returning false."
+#define LG_MSG_GRAPH_NOT_SET_STR "Graph is not set for xlator"
+#define LG_MSG_OPEN_LOGFILE_FAILED_STR "failed to open logfile"
+#define LG_MSG_STRDUP_ERROR_STR "failed to create metrics dir"
+#define LG_MSG_FILENAME_NOT_SPECIFIED_STR "no filename specified"
+#define LG_MSG_UNDERSIZED_BUF_STR "data value is smaller than expected"
+#define LG_MSG_DICT_SET_FAILED_STR "unable to set dict"
+#define LG_MSG_COUNT_LESS_THAN_ZERO_STR "count < 0!"
+#define LG_MSG_PAIRS_LESS_THAN_COUNT_STR "less than count data pairs found"
+#define LG_MSG_NULL_PTR_STR "pair->key is null!"
+#define LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO_STR "value->len < 0"
+#define LG_MSG_INVALID_ARG_STR "buf is null"
+#define LG_MSG_KEY_OR_VALUE_NULL_STR "key or value is null"
+#define LG_MSG_NULL_VALUE_IN_DICT_STR "null value found in dict"
+#define LG_MSG_FAILED_TO_LOG_DICT_STR "Failed to log dictionary"
+#define LG_MSG_DICT_ERROR_STR "dict error"
+#define LG_MSG_STRUCT_MISS_STR "struct missing"
+#define LG_MSG_METHOD_MISS_STR "method missing(init)"
+
+#endif /* !_LG_MESSAGES_H_ */
diff --git a/libglusterfs/src/list.h b/libglusterfs/src/glusterfs/list.h
index 221a710ca30..221a710ca30 100644
--- a/libglusterfs/src/list.h
+++ b/libglusterfs/src/glusterfs/list.h
diff --git a/libglusterfs/src/lkowner.h b/libglusterfs/src/glusterfs/lkowner.h
index b49e9af6bcb..692de34bc7a 100644
--- a/libglusterfs/src/lkowner.h
+++ b/libglusterfs/src/glusterfs/lkowner.h
@@ -11,7 +11,7 @@
#ifndef _LK_OWNER_H
#define _LK_OWNER_H
-#include "glusterfs-fops.h"
+#include "glusterfs/glusterfs-fops.h"
/* LKOWNER to string functions */
static inline void
diff --git a/libglusterfs/src/locking.h b/libglusterfs/src/glusterfs/locking.h
index 43cc87735d1..43cc87735d1 100644
--- a/libglusterfs/src/locking.h
+++ b/libglusterfs/src/glusterfs/locking.h
diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/glusterfs/logging.h
index 859050d568b..b3a6ac191f0 100644
--- a/libglusterfs/src/logging.h
+++ b/libglusterfs/src/glusterfs/logging.h
@@ -16,7 +16,7 @@
#include <stdio.h>
#include <stdarg.h>
#include <pthread.h>
-#include "list.h"
+#include "glusterfs/list.h"
#ifdef GF_DARWIN_HOST_OS
#define GF_PRI_FSBLK "u"
@@ -98,11 +98,9 @@ typedef enum {
typedef struct gf_log_handle_ {
pthread_mutex_t logfile_mutex;
- uint8_t logrotate;
- uint8_t cmd_history_logrotate;
gf_loglevel_t loglevel;
- int gf_log_syslog;
gf_loglevel_t sys_log_level;
+ int gf_log_syslog;
char *filename;
FILE *logfile;
FILE *gf_log_logfile;
@@ -113,12 +111,14 @@ typedef struct gf_log_handle_ {
char *ident;
int log_control_file_found;
struct list_head lru_queue;
- uint32_t lru_size;
- uint32_t lru_cur_size;
- uint32_t timeout;
pthread_mutex_t log_buf_lock;
struct _gf_timer *log_flush_timer;
int localtime;
+ uint32_t lru_size;
+ uint32_t lru_cur_size;
+ uint32_t timeout;
+ uint8_t logrotate;
+ uint8_t cmd_history_logrotate;
} gf_log_handle_t;
typedef struct log_buf_ {
@@ -300,7 +300,7 @@ _gf_log_eh(const char *function, const char *fmt, ...)
/* Log once in GF_UNIVERSAL_ANSWER times */
#define GF_LOG_OCCASIONALLY(var, args...) \
- if (!(var++ % GF_UNIVERSAL_ANSWER)) { \
+ if (var++ == 0 || !((var - 1) % GF_UNIVERSAL_ANSWER)) { \
gf_log(args); \
}
@@ -369,10 +369,6 @@ gf_log_disable_suppression_before_exit(struct _glusterfs_ctx *ctx);
gf_log((xl)->name, GF_LOG_ERROR, format, ##args)
int
-_gf_slog(const char *domain, const char *file, const char *function, int line,
- gf_loglevel_t level, const char *event, ...);
-
-int
_gf_smsg(const char *domain, const char *file, const char *function,
int32_t line, gf_loglevel_t level, int errnum, int trace,
uint64_t msgid, const char *event, ...);
@@ -381,12 +377,7 @@ _gf_smsg(const char *domain, const char *file, const char *function,
#define gf_smsg(dom, level, errnum, msgid, event...) \
do { \
_gf_smsg(dom, __FILE__, __FUNCTION__, __LINE__, level, errnum, 0, \
- msgid, ##event); \
- } while (0)
-
-#define gf_slog(dom, level, event...) \
- do { \
- _gf_slog(dom, __FILE__, __FUNCTION__, __LINE__, level, ##event); \
+ msgid, msgid##_STR, ##event); \
} while (0)
#endif /* __LOGGING_H__ */
diff --git a/libglusterfs/src/lvm-defaults.h b/libglusterfs/src/glusterfs/lvm-defaults.h
index 32feebf3f6e..32feebf3f6e 100644
--- a/libglusterfs/src/lvm-defaults.h
+++ b/libglusterfs/src/glusterfs/lvm-defaults.h
diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
index 118170fd17f..e5b3276d047 100644
--- a/libglusterfs/src/mem-pool.h
+++ b/libglusterfs/src/glusterfs/mem-pool.h
@@ -11,12 +11,11 @@
#ifndef _MEM_POOL_H_
#define _MEM_POOL_H_
-#include "list.h"
-#include "locking.h"
-#include "atomic.h"
-#include "logging.h"
-#include "mem-types.h"
-#include "glusterfs.h" /* for glusterfs_ctx_t */
+#include "glusterfs/list.h"
+#include "glusterfs/atomic.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/glusterfs.h" /* for glusterfs_ctx_t */
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
@@ -38,6 +37,10 @@
#define GF_MEM_TRAILER_MAGIC 0xBAADF00D
#define GF_MEM_INVALID_MAGIC 0xDEADC0DE
+#define POOL_SMALLEST 7 /* i.e. 128 */
+#define POOL_LARGEST 20 /* i.e. 1048576 */
+#define NPOOLS (POOL_LARGEST - POOL_SMALLEST + 1)
+
struct mem_acct_rec {
const char *typestr;
uint64_t size;
@@ -189,7 +192,7 @@ gf_memdup(const void *src, size_t size)
{
void *dup_mem = NULL;
- dup_mem = GF_MALLOC(size, gf_common_mt_strdup);
+ dup_mem = GF_MALLOC(size, gf_common_mt_memdup);
if (!dup_mem)
goto out;
@@ -199,6 +202,24 @@ out:
return dup_mem;
}
+#ifdef GF_DISABLE_MEMPOOL
+
+/* No-op memory pool enough to fit current API without massive redesign. */
+
+struct mem_pool {
+ unsigned long sizeof_type;
+};
+
+#define mem_pools_init() \
+ do { \
+ } while (0)
+#define mem_pools_fini() \
+ do { \
+ } while (0)
+#define mem_pool_thread_destructor(pool_list) (void)pool_list
+
+#else /* !GF_DISABLE_MEMPOOL */
+
/* kind of 'header' for the actual mem_pool_shared structure, this might make
* it possible to dump some more details in a statedump */
struct mem_pool {
@@ -206,8 +227,12 @@ struct mem_pool {
unsigned long sizeof_type;
unsigned long count; /* requested pool size (unused) */
char *name;
- gf_atomic_t active; /* current allocations */
-
+ char *xl_name;
+ gf_atomic_t active; /* current allocations */
+#ifdef DEBUG
+ gf_atomic_t hit; /* number of allocations served from pt_pool */
+ gf_atomic_t miss; /* number of std allocs due to miss */
+#endif
struct list_head owner; /* glusterfs_ctx_t->mempool_list */
glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */
@@ -224,7 +249,10 @@ typedef struct pooled_obj_hdr {
struct mem_pool *pool;
} pooled_obj_hdr_t;
-#define AVAILABLE_SIZE(p2) ((1 << (p2)) - sizeof(pooled_obj_hdr_t))
+/* Each memory block inside a pool has a fixed size that is a power of two.
+ * However each object will have a header that will reduce the available
+ * space. */
+#define AVAILABLE_SIZE(p2) ((1UL << (p2)) - sizeof(pooled_obj_hdr_t))
typedef struct per_thread_pool {
/* the pool that was used to request this allocation */
@@ -235,24 +263,26 @@ typedef struct per_thread_pool {
} per_thread_pool_t;
typedef struct per_thread_pool_list {
- /*
- * These first two members are protected by the global pool lock. When
- * a thread first tries to use any pool, we create one of these. We
- * link it into the global list using thr_list so the pool-sweeper
- * thread can find it, and use pthread_setspecific so this thread can
- * find it. When the per-thread destructor runs, we "poison" the pool
- * list to prevent further allocations. This also signals to the
- * pool-sweeper thread that the list should be detached and freed after
- * the next time it's swept.
- */
+ /* thr_list is used to place the TLS pool_list into the active global list
+ * (pool_threads) or the inactive global list (pool_free_threads). It's
+ * protected by the global pool_lock. */
struct list_head thr_list;
- unsigned int poison;
+
+ /* This lock is used to update poison and the hot/cold lists of members
+ * of 'pools' array. */
+ pthread_spinlock_t lock;
+
+ /* This field is used to mark a pool_list as not being owned by any thread.
+ * This means that the sweeper thread won't be cleaning objects stored in
+ * its pools. mem_put() uses it to decide if the object being released is
+ * placed into its original pool_list or directly destroyed. */
+ bool poison;
+
/*
* There's really more than one pool, but the actual number is hidden
* in the implementation code so we just make it a single-element array
* here.
*/
- pthread_spinlock_t lock;
per_thread_pool_t pools[1];
} per_thread_pool_list_t;
@@ -272,11 +302,13 @@ struct mem_pool_shared {
};
void
-mem_pools_init_early(void); /* basic initialization of memory pools */
-void
-mem_pools_init_late(void); /* start the pool_sweeper thread */
+mem_pools_init(void); /* start the pool_sweeper thread */
void
mem_pools_fini(void); /* cleanup memory pools */
+void
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
+
+#endif /* GF_DISABLE_MEMPOOL */
struct mem_pool *
mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
new file mode 100644
index 00000000000..d45d5b68c91
--- /dev/null
+++ b/libglusterfs/src/glusterfs/mem-types.h
@@ -0,0 +1,139 @@
+/*
+ Copyright (c) 2008-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MEM_TYPES_H__
+#define __MEM_TYPES_H__
+
+enum gf_common_mem_types_ {
+ gf_common_mt_dnscache6, /* used only in one location */
+ gf_common_mt_event_pool,
+ gf_common_mt_reg,
+ gf_common_mt_pollfd, /* used only in one location */
+ gf_common_mt_fdentry_t, /* used only in one location */
+ gf_common_mt_fdtable_t, /* used only in one location */
+ gf_common_mt_fd_ctx, /* used only in one location */
+ gf_common_mt_gf_dirent_t,
+ gf_common_mt_inode_t, /* used only in one location */
+ gf_common_mt_inode_ctx, /* used only in one location */
+ gf_common_mt_list_head,
+ gf_common_mt_inode_table_t, /* used only in one location */
+ gf_common_mt_xlator_t,
+ gf_common_mt_xlator_list_t, /* used only in one location */
+ gf_common_mt_volume_opt_list_t,
+ gf_common_mt_gf_timer_t, /* used only in one location */
+ gf_common_mt_gf_timer_registry_t, /* used only in one location */
+ gf_common_mt_auth_handle_t, /* used only in one location */
+ gf_common_mt_iobuf, /* used only in one location */
+ gf_common_mt_iobuf_arena, /* used only in one location */
+ gf_common_mt_iobref, /* used only in one location */
+ gf_common_mt_iobuf_pool, /* used only in one location */
+ gf_common_mt_iovec,
+ gf_common_mt_memdup, /* used only in one location */
+ gf_common_mt_asprintf, /* used only in one location */
+ gf_common_mt_strdup,
+ gf_common_mt_socket_private_t, /* used only in one location */
+ gf_common_mt_ioq, /* used only in one location */
+ gf_common_mt_char,
+ gf_common_mt_rbthash_table_t, /* used only in one location */
+ gf_common_mt_rbthash_bucket, /* used only in one location */
+ gf_common_mt_mem_pool, /* used only in one location */
+ gf_common_mt_rpcsvc_auth_list, /* used only in one location */
+ gf_common_mt_rpcsvc_t, /* used only in one location */
+ gf_common_mt_rpcsvc_program_t, /* used only in one location */
+ gf_common_mt_rpcsvc_listener_t, /* used only in one location */
+ gf_common_mt_rpcsvc_wrapper_t, /* used only in one location */
+ gf_common_mt_rpcclnt_t, /* used only in one location */
+ gf_common_mt_rpcclnt_savedframe_t, /* used only in one location */
+ gf_common_mt_rpc_trans_t,
+ gf_common_mt_rpc_trans_pollin_t, /* used only in one location */
+ gf_common_mt_rpc_trans_reqinfo_t, /* used only in one location */
+ gf_common_mt_glusterfs_graph_t,
+ gf_common_mt_rdma_private_t, /* used only in one location */
+ gf_common_mt_rpc_transport_t, /* used only in one location */
+ gf_common_mt_rdma_post_t, /* used only in one location */
+ gf_common_mt_qpent, /* used only in one location */
+ gf_common_mt_rdma_device_t, /* used only in one location */
+ gf_common_mt_rdma_arena_mr, /* used only in one location */
+ gf_common_mt_sge, /* used only in one location */
+ gf_common_mt_rpcclnt_cb_program_t, /* used only in one location */
+ gf_common_mt_libxl_marker_local, /* used only in one location */
+ gf_common_mt_graph_buf, /* used only in one location */
+ gf_common_mt_trie_trie, /* used only in one location */
+ gf_common_mt_trie_data, /* used only in one location */
+ gf_common_mt_trie_node, /* used only in one location */
+ gf_common_mt_trie_buf, /* used only in one location */
+ gf_common_mt_run_argv, /* used only in one location */
+ gf_common_mt_run_logbuf, /* used only in one location */
+ gf_common_mt_fd_lk_ctx_t, /* used only in one location */
+ gf_common_mt_fd_lk_ctx_node_t, /* used only in one location */
+ gf_common_mt_buffer_t, /* used only in one location */
+ gf_common_mt_circular_buffer_t, /* used only in one location */
+ gf_common_mt_eh_t,
+ gf_common_mt_store_handle_t, /* used only in one location */
+ gf_common_mt_store_iter_t, /* used only in one location */
+ gf_common_mt_drc_client_t, /* used only in one location */
+ gf_common_mt_drc_globals_t, /* used only in one location */
+ gf_common_mt_groups_t,
+ gf_common_mt_cliententry_t, /* used only in one location */
+ gf_common_mt_clienttable_t, /* used only in one location */
+ gf_common_mt_client_t, /* used only in one location */
+ gf_common_mt_client_ctx, /* used only in one location */
+ gf_common_mt_auxgids, /* used only in one location */
+ gf_common_mt_syncopctx, /* used only in one location */
+ gf_common_mt_iobrefs, /* used only in one location */
+ gf_common_mt_gsync_status_t,
+ gf_common_mt_uuid_t,
+ gf_common_mt_mgmt_v3_lock_obj_t, /* used only in one location */
+ gf_common_mt_txn_opinfo_obj_t, /* used only in one location */
+ gf_common_mt_strfd_t, /* used only in one location */
+ gf_common_mt_strfd_data_t, /* used only in one location */
+ gf_common_mt_regex_t, /* used only in one location */
+ gf_common_mt_ereg, /* used only in one location */
+ gf_common_mt_wr, /* used only in one location */
+ gf_common_mt_dnscache, /* used only in one location */
+ gf_common_mt_dnscache_entry, /* used only in one location */
+ gf_common_mt_parser_t, /* used only in one location */
+ gf_common_quota_meta_t,
+ gf_common_mt_rbuf_t, /* used only in one location */
+ gf_common_mt_rlist_t, /* used only in one location */
+ gf_common_mt_rvec_t, /* used only in one location */
+ /* glusterd can load the nfs-xlator dynamically and needs these two */
+ gf_common_mt_nfs_netgroups, /* used only in one location */
+ gf_common_mt_nfs_exports, /* used only in one location */
+ gf_common_mt_gf_brick_spec_t, /* used only in one location */
+ gf_common_mt_int,
+ gf_common_mt_pointer,
+ gf_common_mt_synctask, /* used only in one location */
+ gf_common_mt_syncstack, /* used only in one location */
+ gf_common_mt_syncenv, /* used only in one location */
+ gf_common_mt_scan_data, /* used only in one location */
+ gf_common_list_node,
+ gf_mt_default_args_t, /* used only in one location */
+ gf_mt_default_args_cbk_t, /* used only in one location */
+ /*used for compound fops*/
+ gf_mt_compound_req_t, /* used only in one location */
+ gf_mt_compound_rsp_t, /* used only in one location */
+ gf_common_mt_tw_ctx, /* used only in one location */
+ gf_common_mt_tw_timer_list,
+ /*lock migration*/
+ gf_common_mt_lock_mig,
+ /* throttle */
+ gf_common_mt_tbf_t, /* used only in one location */
+ gf_common_mt_tbf_bucket_t, /* used only in one location */
+ gf_common_mt_tbf_throttle_t, /* used only in one location */
+ gf_common_mt_pthread_t, /* used only in one location */
+ gf_common_ping_local_t, /* used only in one location */
+ gf_common_volfile_t,
+ gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
+ gf_common_mt_server_cmdline_t, /* used only in one location */
+ gf_common_mt_latency_t,
+ gf_common_mt_end
+};
+#endif
diff --git a/libglusterfs/src/monitoring.h b/libglusterfs/src/glusterfs/monitoring.h
index 7826d4ec4df..09d9f54e734 100644
--- a/libglusterfs/src/monitoring.h
+++ b/libglusterfs/src/glusterfs/monitoring.h
@@ -11,7 +11,7 @@
#ifndef __MONITORING_H__
#define __MONITORING_H__
-#include "glusterfs.h"
+#include "glusterfs/glusterfs.h"
#define GLUSTER_METRICS_DIR "/var/run/gluster/metrics"
diff --git a/libglusterfs/src/options.h b/libglusterfs/src/glusterfs/options.h
index e8600f14f56..747b13ba375 100644
--- a/libglusterfs/src/options.h
+++ b/libglusterfs/src/glusterfs/options.h
@@ -15,8 +15,8 @@
#include <stdint.h>
#include <inttypes.h>
-#include "xlator.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/libglusterfs-messages.h"
/* Add possible new type of option you may need */
typedef enum {
GF_OPTION_TYPE_ANY = 0,
@@ -264,7 +264,7 @@ DECLARE_INIT_OPT(uint32_t, time);
#define DECLARE_RECONF_OPT(type_t, type) \
int xlator_option_reconf_##type(xlator_t *this, dict_t *options, \
- char *key, type_t *val_p);
+ char *key, int keylen, type_t *val_p);
DECLARE_RECONF_OPT(char *, str);
DECLARE_RECONF_OPT(uint64_t, uint64);
@@ -283,56 +283,44 @@ DECLARE_RECONF_OPT(uint32_t, time);
#define DEFINE_RECONF_OPT(type_t, type, conv) \
int xlator_option_reconf_##type(xlator_t *this, dict_t *options, \
- char *key, type_t *val_p) \
+ char *key, int keylen, type_t *val_p) \
{ \
int ret = 0; \
- volume_option_t *opt = NULL; \
- char *def_value = NULL; \
- char *set_value = NULL; \
char *value = NULL; \
xlator_t *old_THIS = NULL; \
\
- opt = xlator_volume_option_get(this, key); \
+ volume_option_t *opt = xlator_volume_option_get(this, key); \
if (!opt) { \
gf_msg(this->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ENTRY, \
"unknown option: %s", key); \
- ret = -1; \
- return ret; \
+ return -1; \
} \
- def_value = opt->default_value; \
- ret = dict_get_str(options, key, &set_value); \
- \
- if (def_value) \
- value = def_value; \
- if (set_value) \
- value = set_value; \
- if (!value) { \
- gf_msg_trace(this->name, 0, "option %s not set", key); \
- *val_p = (type_t)0; \
- return 0; \
- } \
- if (value == def_value) { \
+ ret = dict_get_strn(options, key, keylen, &value); \
+ if (ret == 0 && value) { \
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, \
+ "option %s using set value %s", key, value); \
+ } else if (opt->default_value) { \
+ value = opt->default_value; \
gf_msg_trace(this->name, 0, "option %s using default value %s", \
key, value); \
} else { \
- gf_msg(this->name, GF_LOG_INFO, 0, 0, \
- "option %s using set value %s", key, value); \
+ gf_msg_trace(this->name, 0, "option %s not set", key); \
+ *val_p = (type_t)0; \
+ return 0; \
} \
+ \
old_THIS = THIS; \
THIS = this; \
ret = conv(value, val_p); \
THIS = old_THIS; \
if (ret) \
return ret; \
- ret = xlator_option_validate(this, key, value, opt, NULL); \
- return ret; \
+ return xlator_option_validate(this, key, value, opt, NULL); \
}
#define GF_OPTION_RECONF(key, val, opt, type, err_label) \
do { \
- int val_ret = 0; \
- val_ret = xlator_option_reconf_##type(THIS, opt, key, &(val)); \
- if (val_ret) \
+ if (xlator_option_reconf_##type(THIS, opt, key, SLEN(key), &(val))) \
goto err_label; \
} while (0)
diff --git a/libglusterfs/src/parse-utils.h b/libglusterfs/src/glusterfs/parse-utils.h
index 8653b9dd180..8653b9dd180 100644
--- a/libglusterfs/src/parse-utils.h
+++ b/libglusterfs/src/glusterfs/parse-utils.h
diff --git a/libglusterfs/src/quota-common-utils.h b/libglusterfs/src/glusterfs/quota-common-utils.h
index 49f238b31f3..0096e340756 100644
--- a/libglusterfs/src/quota-common-utils.h
+++ b/libglusterfs/src/glusterfs/quota-common-utils.h
@@ -11,7 +11,7 @@
#ifndef _QUOTA_COMMON_UTILS_H
#define _QUOTA_COMMON_UTILS_H
-#include "iatt.h"
+#include "glusterfs/iatt.h"
#define GF_QUOTA_CONF_VERSION 1.2
#define QUOTA_CONF_HEADER "GlusterFS Quota conf | version: v1.2\n"
diff --git a/libglusterfs/src/rbthash.h b/libglusterfs/src/glusterfs/rbthash.h
index dea4648ca5a..4c731de69c2 100644
--- a/libglusterfs/src/rbthash.h
+++ b/libglusterfs/src/glusterfs/rbthash.h
@@ -10,14 +10,12 @@
#ifndef __RBTHASH_TABLE_H_
#define __RBTHASH_TABLE_H_
-#include "rb.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-#include <pthread.h>
+#include <stdint.h> // for uint32_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t, glusterfs_ctx_t
+#include "glusterfs/list.h" // for list_head
+#include "glusterfs/locking.h" // for gf_lock_t
+struct mem_pool;
#define GF_RBTHASH_MEMPOOL 16384 // 1048576
#define GF_RBTHASH "rbthash"
diff --git a/libglusterfs/src/refcount.h b/libglusterfs/src/glusterfs/refcount.h
index 6a3a73d80e8..cf922dabb05 100644
--- a/libglusterfs/src/refcount.h
+++ b/libglusterfs/src/glusterfs/refcount.h
@@ -22,7 +22,7 @@
#undef REFCOUNT_NEEDS_LOCK
#else
#define REFCOUNT_NEEDS_LOCK
-#include "locking.h"
+#include "glusterfs/locking.h"
#endif /* compiler support for __sync_*_and_fetch() */
typedef void (*gf_ref_release_t)(void *data);
diff --git a/libglusterfs/src/revision.h b/libglusterfs/src/glusterfs/revision.h
index 3c404d30e78..3c404d30e78 100644
--- a/libglusterfs/src/revision.h
+++ b/libglusterfs/src/glusterfs/revision.h
diff --git a/libglusterfs/src/rot-buffs.h b/libglusterfs/src/glusterfs/rot-buffs.h
index 1e91e83ecf0..9dc227d58b8 100644
--- a/libglusterfs/src/rot-buffs.h
+++ b/libglusterfs/src/glusterfs/rot-buffs.h
@@ -11,9 +11,9 @@
#ifndef __ROT_BUFFS_H
#define __ROT_BUFFS_H
-#include "list.h"
-#include "locking.h"
-#include "common-utils.h"
+#include "glusterfs/list.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/common-utils.h"
typedef struct rbuf_iovec {
struct iovec iov;
diff --git a/libglusterfs/src/run.h b/libglusterfs/src/glusterfs/run.h
index 76af95fd27f..76af95fd27f 100644
--- a/libglusterfs/src/run.h
+++ b/libglusterfs/src/glusterfs/run.h
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/glusterfs/stack.h
index cda04dfdb29..536a330d38b 100644
--- a/libglusterfs/src/stack.h
+++ b/libglusterfs/src/glusterfs/stack.h
@@ -25,14 +25,14 @@ typedef struct call_pool call_pool_t;
#include <sys/time.h>
-#include "xlator.h"
-#include "dict.h"
-#include "list.h"
-#include "common-utils.h"
-#include "lkowner.h"
-#include "client_t.h"
-#include "libglusterfs-messages.h"
-#include "timespec.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/list.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/client_t.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/timespec.h"
#define NFS_PID 1
#define LOW_PRIO_PROC_PID -1
@@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame,
xlator_t *this, int32_t op_ret, int32_t op_errno,
...);
+void
+gf_frame_latency_update(call_frame_t *frame);
+
struct call_pool {
union {
struct list_head all_frames;
@@ -149,8 +152,6 @@ struct _call_stack {
} while (0);
struct xlator_fops;
-void
-gf_update_latency(call_frame_t *frame);
static inline void
FRAME_DESTROY(call_frame_t *frame)
@@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame)
void *local = NULL;
if (frame->root->ctx->measure_latency)
- gf_update_latency(frame);
+ gf_frame_latency_update(frame);
list_del_init(&frame->frames);
if (frame->local) {
@@ -429,6 +430,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps)
if (ngrps <= SMALL_GROUP_COUNT) {
stack->groups = stack->groups_small;
} else {
+ GF_FREE(stack->groups_large);
stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t),
gf_common_mt_groups_t);
if (!stack->groups_large)
@@ -442,6 +444,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps)
}
static inline int
+call_stack_groups_capacity(call_stack_t *stack)
+{
+ return max(stack->ngrps, SMALL_GROUP_COUNT);
+}
+
+static inline int
call_frames_count(call_stack_t *call_stack)
{
call_frame_t *pos;
diff --git a/libglusterfs/src/statedump.h b/libglusterfs/src/glusterfs/statedump.h
index af653041493..ce082706bdf 100644
--- a/libglusterfs/src/statedump.h
+++ b/libglusterfs/src/glusterfs/statedump.h
@@ -12,8 +12,8 @@
#define STATEDUMP_H
#include <stdarg.h>
-#include "inode.h"
-#include "strfd.h"
+#include "glusterfs/inode.h"
+#include "glusterfs/strfd.h"
#define GF_DUMP_MAX_BUF_LEN 4096
@@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd);
void
gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd);
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat);
#endif /* STATEDUMP_H */
diff --git a/libglusterfs/src/store.h b/libglusterfs/src/glusterfs/store.h
index d69e39a7ce3..a1f70c7b840 100644
--- a/libglusterfs/src/store.h
+++ b/libglusterfs/src/glusterfs/store.h
@@ -10,8 +10,8 @@
#ifndef _GLUSTERD_STORE_H_
#define _GLUSTERD_STORE_H_
-#include "compat.h"
-#include "glusterfs.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/glusterfs.h"
struct gf_store_handle_ {
char *path;
@@ -59,8 +59,8 @@ int32_t
gf_store_unlink_tmppath(gf_store_handle_t *shandle);
int
-gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
- char **iter_val, gf_store_op_errno_t *store_errno);
+gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+ gf_store_op_errno_t *store_errno);
int32_t
gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
@@ -69,6 +69,9 @@ int32_t
gf_store_save_value(int fd, char *key, char *value);
int32_t
+gf_store_save_items(int fd, char *items);
+
+int32_t
gf_store_handle_new(const char *path, gf_store_handle_t **handle);
int
@@ -92,7 +95,7 @@ int32_t
gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value);
int32_t
-gf_store_iter_destroy(gf_store_iter_t *iter);
+gf_store_iter_destroy(gf_store_iter_t **iter);
char *
gf_store_strerror(gf_store_op_errno_t op_errno);
diff --git a/libglusterfs/src/strfd.h b/libglusterfs/src/glusterfs/strfd.h
index 861cd02e005..861cd02e005 100644
--- a/libglusterfs/src/strfd.h
+++ b/libglusterfs/src/glusterfs/strfd.h
diff --git a/libglusterfs/src/syncop-utils.h b/libglusterfs/src/glusterfs/syncop-utils.h
index 1f3ee403edc..1f3ee403edc 100644
--- a/libglusterfs/src/syncop-utils.h
+++ b/libglusterfs/src/glusterfs/syncop-utils.h
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/glusterfs/syncop.h
index 88279d15490..4e9241a32fc 100644
--- a/libglusterfs/src/syncop.h
+++ b/libglusterfs/src/glusterfs/syncop.h
@@ -11,10 +11,12 @@
#ifndef _SYNCOP_H
#define _SYNCOP_H
-#include "xlator.h"
#include <sys/time.h>
#include <pthread.h>
#include <ucontext.h>
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/stack.h" // for call_frame_t, STACK_DESTROY, STACK_...
+#include "glusterfs/timer.h"
#define SYNCENV_PROC_MAX 16
#define SYNCENV_PROC_MIN 2
@@ -29,9 +31,15 @@
#define SYNCOPCTX_PID 0x00000008
#define SYNCOPCTX_LKOWNER 0x00000010
+#ifdef HAVE_TSAN_API
+/* Currently hardcoded within thread context maintained by the sanitizer. */
+#define TSAN_THREAD_NAMELEN 64
+#endif
+
struct synctask;
struct syncproc;
struct syncenv;
+struct synccond;
typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque);
@@ -55,9 +63,12 @@ struct synctask {
call_frame_t *opframe;
synctask_cbk_t synccbk;
synctask_fn_t syncfn;
- synctask_state_t state;
+ struct timespec *delta;
+ gf_timer_t *timer;
+ struct synccond *synccond;
void *opaque;
void *stack;
+ synctask_state_t state;
int woken;
int slept;
int ret;
@@ -65,6 +76,13 @@ struct synctask {
uid_t uid;
gid_t gid;
+#ifdef HAVE_TSAN_API
+ struct {
+ void *fiber;
+ char name[TSAN_THREAD_NAMELEN];
+ } tsan;
+#endif
+
ucontext_t ctx;
struct syncproc *proc;
@@ -73,11 +91,18 @@ struct synctask {
int done;
struct list_head waitq; /* can wait only "once" at a time */
- char btbuf[GF_BACKTRACE_LEN];
};
struct syncproc {
pthread_t processor;
+
+#ifdef HAVE_TSAN_API
+ struct {
+ void *fiber;
+ char name[TSAN_THREAD_NAMELEN];
+ } tsan;
+#endif
+
ucontext_t sched;
struct syncenv *env;
struct synctask *current;
@@ -86,19 +111,21 @@ struct syncproc {
/* hosts the scheduler thread and framework for executing synctasks */
struct syncenv {
struct syncproc proc[SYNCENV_PROC_MAX];
- int procs;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
struct list_head runq;
- int runcount;
struct list_head waitq;
- int waitcount;
+
+ int procs;
+ int procs_idle;
+
+ int runcount;
int procmin;
int procmax;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
-
size_t stacksize;
int destroy; /* FLAG to mark syncenv is in destroy mode
@@ -124,6 +151,13 @@ struct synclock {
};
typedef struct synclock synclock_t;
+struct synccond {
+ pthread_mutex_t pmutex;
+ pthread_cond_t pcond;
+ struct list_head waitq;
+};
+typedef struct synccond synccond_t;
+
struct syncbarrier {
gf_boolean_t initialized; /*Set on successful initialization*/
pthread_mutex_t guard; /* guard the remaining members, pair @cond */
@@ -138,8 +172,19 @@ typedef struct syncbarrier syncbarrier_t;
struct syncargs {
int op_ret;
int op_errno;
+
+ /*
+ * The below 3 iatt structures are used in the fops
+ * whose callbacks get struct iatt as one of the
+ * a return arguments. Currently, the maximum number
+ * of iatt structures returned is 3 for some fops
+ * such as mknod, copy_file_range, mkdir etc. So
+ * all the following 3 iatt structures would be used
+ * for those fops.
+ */
struct iatt iatt1;
struct iatt iatt2;
+ struct iatt iatt3;
dict_t *xattr;
struct statvfs statvfs_buf;
struct iovec *vector;
@@ -209,7 +254,7 @@ struct syncopctx {
#define __yield(args) \
do { \
if (args->task) { \
- synctask_yield(args->task); \
+ synctask_yield(args->task, NULL); \
} else { \
pthread_mutex_lock(&args->mutex); \
{ \
@@ -230,10 +275,16 @@ struct syncopctx {
task = synctask_get(); \
stb->task = task; \
if (task) \
- frame = task->opframe; \
+ frame = copy_frame(task->opframe); \
else \
frame = syncop_create_frame(THIS); \
\
+ if (!frame) { \
+ stb->op_ret = -1; \
+ stb->op_errno = errno; \
+ break; \
+ } \
+ \
if (task) { \
frame->root->uid = task->uid; \
frame->root->gid = task->gid; \
@@ -245,10 +296,7 @@ struct syncopctx {
STACK_WIND_COOKIE(frame, cbk, (void *)stb, subvol, fn_op, params); \
\
__yield(stb); \
- if (task) \
- STACK_RESET(frame->root); \
- else \
- STACK_DESTROY(frame->root); \
+ STACK_DESTROY(frame->root); \
} while (0)
/*
@@ -297,7 +345,9 @@ synctask_join(struct synctask *task);
void
synctask_wake(struct synctask *task);
void
-synctask_yield(struct synctask *task);
+synctask_yield(struct synctask *task, struct timespec *delta);
+void
+synctask_sleep(int32_t secs);
void
synctask_waitfor(struct synctask *task, int count);
@@ -395,6 +445,24 @@ synclock_trylock(synclock_t *lock);
int
synclock_unlock(synclock_t *lock);
+int32_t
+synccond_init(synccond_t *cond);
+
+void
+synccond_destroy(synccond_t *cond);
+
+int
+synccond_wait(synccond_t *cond, synclock_t *lock);
+
+int
+synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta);
+
+void
+synccond_signal(synccond_t *cond);
+
+void
+synccond_broadcast(synccond_t *cond);
+
int
syncbarrier_init(syncbarrier_t *barrier);
int
@@ -634,4 +702,17 @@ syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata_in, dict_t **xdata_out);
+int
+syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *preiatt_dst, struct iatt *postiatt_dst,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata);
+
#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/glusterfs/syscall.h
index faaf694b22c..b6d3ab4f2ad 100644
--- a/libglusterfs/src/syscall.h
+++ b/libglusterfs/src/glusterfs/syscall.h
@@ -17,6 +17,7 @@
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/socket.h>
+#include <stdio.h>
/* GF follows the Linux XATTR definition, which differs in Darwin. */
#define GF_XATTR_CREATE 0x1 /* set value, fail if attr already exists */
@@ -95,18 +96,27 @@ int
sys_unlink(const char *pathname);
int
+sys_unlinkat(int dfd, const char *pathname);
+
+int
sys_rmdir(const char *pathname);
int
sys_symlink(const char *oldpath, const char *newpath);
int
+sys_symlinkat(const char *oldpath, int dirfd, const char *newpath);
+
+int
sys_rename(const char *oldpath, const char *newpath);
int
sys_link(const char *oldpath, const char *newpath);
int
+sys_linkat(int oldfd, const char *oldpath, int newfd, const char *newpath);
+
+int
sys_chmod(const char *path, mode_t mode);
int
@@ -228,4 +238,41 @@ sys_socket(int domain, int type, int protocol);
int
sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags);
+#ifdef GF_BSD_HOST_OS
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+#endif /* GF_BSD_HOST_OS */
+
+/*
+ * According to the man page of copy_file_range, both off_in and off_out are
+ * pointers to the data type loff_t (i.e. loff_t *). But, freebsd does not
+ * have (and recognize) loff_t. Since loff_t is 64 bits, use off64_t
+ * instead. Since it's a pointer type it should be okay. It just needs
+ * to be a pointer-to-64-bit pointer for both 32- and 64-bit platforms.
+ * off64_t is recognized by freebsd.
+ * TODO: In future, when freebsd can recognize loff_t, probably revisit this
+ * and change the off_in and off_out to (loff_t *).
+ */
+ssize_t
+sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
+ size_t len, unsigned int flags);
+
+int
+sys_kill(pid_t pid, int sig);
+
+#ifdef __FreeBSD__
+int
+sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ const void *newp, size_t newlen);
+#endif
+
#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/template-component-messages.h b/libglusterfs/src/glusterfs/template-component-messages.h
index 4dcdda4abf4..aa7ad3d1baa 100644
--- a/libglusterfs/src/template-component-messages.h
+++ b/libglusterfs/src/glusterfs/template-component-messages.h
@@ -11,7 +11,7 @@
#ifndef _component_MESSAGES_H_
#define _component_MESSAGES_H_
-#include "glfs-message-id.h"
+#include "glusterfs/glfs-message-id.h"
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/libglusterfs/src/throttle-tbf.h b/libglusterfs/src/glusterfs/throttle-tbf.h
index b44a0f1fdd5..cccb13c83d9 100644
--- a/libglusterfs/src/throttle-tbf.h
+++ b/libglusterfs/src/glusterfs/throttle-tbf.h
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "list.h"
-#include "xlator.h"
-#include "locking.h"
+#include "glusterfs/list.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/locking.h"
#ifndef THROTTLE_TBF_H__
#define THROTTLE_TBF_H__
diff --git a/libglusterfs/src/timer.h b/libglusterfs/src/glusterfs/timer.h
index 216fc65144a..ae5b2edf451 100644
--- a/libglusterfs/src/timer.h
+++ b/libglusterfs/src/glusterfs/timer.h
@@ -11,8 +11,8 @@
#ifndef _TIMER_H
#define _TIMER_H
-#include "glusterfs.h"
-#include "xlator.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/xlator.h"
#include <sys/time.h>
#include <pthread.h>
@@ -34,10 +34,11 @@ struct _gf_timer {
};
struct _gf_timer_registry {
+ struct list_head active;
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
pthread_t th;
char fin;
- struct list_head active;
- gf_lock_t lock;
};
typedef struct _gf_timer gf_timer_t;
diff --git a/libglusterfs/src/timespec.h b/libglusterfs/src/glusterfs/timespec.h
index 871871d538c..bb9ab446a5f 100644
--- a/libglusterfs/src/timespec.h
+++ b/libglusterfs/src/glusterfs/timespec.h
@@ -21,6 +21,8 @@
void
timespec_now(struct timespec *ts);
void
+timespec_now_realtime(struct timespec *ts);
+void
timespec_adjust_delta(struct timespec *ts, struct timespec delta);
void
timespec_sub(const struct timespec *begin, const struct timespec *end,
diff --git a/libglusterfs/src/trie.h b/libglusterfs/src/glusterfs/trie.h
index 6d2d8015964..6d2d8015964 100644
--- a/libglusterfs/src/trie.h
+++ b/libglusterfs/src/glusterfs/trie.h
diff --git a/libglusterfs/src/upcall-utils.h b/libglusterfs/src/glusterfs/upcall-utils.h
index 765b8ef1aa6..0de8428c5fc 100644
--- a/libglusterfs/src/upcall-utils.h
+++ b/libglusterfs/src/glusterfs/upcall-utils.h
@@ -11,9 +11,9 @@
#ifndef _UPCALL_UTILS_H
#define _UPCALL_UTILS_H
-#include "iatt.h"
-#include "compat-uuid.h"
-#include "compat.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/compat.h"
/* Flags sent for cache_invalidation */
#define UP_NLINK 0x00000001 /* update nlink */
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/glusterfs/xlator.h
index 64a61ac0bed..4fd3abdaeff 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/glusterfs/xlator.h
@@ -11,18 +11,16 @@
#ifndef _XLATOR_H
#define _XLATOR_H
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "event-history.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "dict.h"
-#include "compat.h"
-#include "list.h"
-#include "latency.h"
-#include "compat-uuid.h"
+#include <stdint.h> // for int32_t
+#include <sys/types.h> // for off_t, mode_t, off64_t, dev_t
+#include "glusterfs/glusterfs-fops.h" // for GF_FOP_MAXVALUE, entrylk_cmd
+#include "glusterfs/atomic.h" // for gf_atomic_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t, glusterfs_ctx_t
+#include "glusterfs/compat-uuid.h" // for uuid_t
+#include "glusterfs/compat.h"
+#include "glusterfs/event-history.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/latency.h"
#define FIRST_CHILD(xl) (xl->children->xlator)
#define SECOND_CHILD(xl) (xl->children->next->xlator)
@@ -34,6 +32,8 @@
#define GF_SET_ATTR_ATIME 0x10
#define GF_SET_ATTR_MTIME 0x20
#define GF_SET_ATTR_CTIME 0x40
+#define GF_ATTR_ATIME_NOW 0x80
+#define GF_ATTR_MTIME_NOW 0x100
#define gf_attr_mode_set(mode) ((mode)&GF_SET_ATTR_MODE)
#define gf_attr_uid_set(mode) ((mode)&GF_SET_ATTR_UID)
@@ -54,14 +54,14 @@ typedef struct _loc loc_t;
typedef int32_t (*event_notify_fn_t)(xlator_t *this, int32_t event, void *data,
...);
-#include "list.h"
-#include "gf-dirent.h"
-#include "stack.h"
-#include "iobuf.h"
-#include "globals.h"
-#include "iatt.h"
-#include "options.h"
-#include "client_t.h"
+#include "glusterfs/list.h"
+#include "glusterfs/gf-dirent.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/iobuf.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/options.h"
+#include "glusterfs/client_t.h"
struct _loc {
const char *path;
@@ -354,6 +354,11 @@ typedef int32_t (*fop_namelink_cbk_t)(call_frame_t *frame, void *cookie,
int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata);
+typedef int32_t (*fop_copy_file_range_cbk_t)(
+ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
typedef int32_t (*fop_lookup_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata);
@@ -544,6 +549,11 @@ typedef int32_t (*fop_icreate_t)(call_frame_t *frame, xlator_t *this,
typedef int32_t (*fop_namelink_t)(call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *xdata);
+typedef int32_t (*fop_copy_file_range_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags,
+ dict_t *xdata);
/* WARNING: make sure the list is in order with FOP definition in
`rpc/xdr/src/glusterfs-fops.x`.
@@ -609,6 +619,7 @@ struct xlator_fops {
fop_put_t put;
fop_icreate_t icreate;
fop_namelink_t namelink;
+ fop_copy_file_range_t copy_file_range;
/* these entries are used for a typechecking hack in STACK_WIND _only_ */
/* make sure to add _cbk variables only after defining regular fops as
@@ -673,6 +684,7 @@ struct xlator_fops {
fop_put_cbk_t put_cbk;
fop_icreate_cbk_t icreate_cbk;
fop_namelink_cbk_t namelink_cbk;
+ fop_copy_file_range_cbk_t copy_file_range_cbk;
};
typedef int32_t (*cbk_forget_t)(xlator_t *this, inode_t *inode);
@@ -690,6 +702,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
+typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd);
+
struct xlator_cbks {
cbk_forget_t forget;
cbk_release_t release;
@@ -700,6 +714,8 @@ struct xlator_cbks {
cbk_ictxmerge_t ictxmerge;
cbk_inodectx_size_t ictxsize;
cbk_fdctx_size_t fdctxsize;
+ cbk_fdclose_t fdclose;
+ cbk_fdclose_t fdclosedir;
};
typedef int32_t (*dumpop_priv_t)(xlator_t *this);
@@ -789,7 +805,7 @@ struct _xlator {
struct {
/* for latency measurement */
- fop_latency_t latencies[GF_FOP_MAXVALUE];
+ gf_latency_t latencies[GF_FOP_MAXVALUE];
/* for latency measurement */
fop_metrics_t metrics[GF_FOP_MAXVALUE];
@@ -843,22 +859,15 @@ struct _xlator {
/* Flag to understand how this xlator is categorized */
gf_category_t category;
- /* Variable to save fd_count for detach brick */
- gf_atomic_t fd_cnt;
-
/* Variable to save xprt associated for detach brick */
gf_atomic_t xprtrefcnt;
/* Flag to notify got CHILD_DOWN event for detach brick */
uint32_t notify_down;
-};
-typedef struct {
- int32_t (*init)(xlator_t *this);
- void (*fini)(xlator_t *this);
- int32_t (*reconfigure)(xlator_t *this, dict_t *options);
- event_notify_fn_t notify;
-} class_methods_t;
+ /* Flag to avoid throw duplicate PARENT_DOWN event */
+ uint32_t parent_down;
+};
/* This would be the only structure which needs to be exported by
the translators. For the backward compatibility, in 4.x series
@@ -1032,8 +1041,10 @@ gf_boolean_t
loc_is_nameless(loc_t *loc);
int
xlator_mem_acct_init(xlator_t *xl, int num_types);
+void
+xlator_mem_acct_unref(struct mem_acct *mem_acct);
int
-is_gf_log_command(xlator_t *trans, const char *name, char *value);
+is_gf_log_command(xlator_t *trans, const char *name, char *value, size_t size);
int
glusterd_check_log_level(const char *value);
int
@@ -1082,4 +1093,14 @@ xlator_mem_cleanup(xlator_t *this);
void
handle_default_options(xlator_t *xl, dict_t *options);
+void
+gluster_graph_take_reference(xlator_t *tree);
+
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name);
+
+gf_boolean_t
+xlator_is_cleanup_starting(xlator_t *this);
+int
+graph_total_client_xlator(glusterfs_graph_t *graph);
#endif /* _XLATOR_H */
diff --git a/libglusterfs/src/graph-print.c b/libglusterfs/src/graph-print.c
index 3b984b64508..595d74330a1 100644
--- a/libglusterfs/src/graph-print.c
+++ b/libglusterfs/src/graph-print.c
@@ -10,10 +10,10 @@
#include <sys/uio.h>
-#include "common-utils.h"
-#include "xlator.h"
-#include "graph-utils.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/graph-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
struct gf_printer {
ssize_t (*write)(struct gf_printer *gp, char *buf, size_t len);
@@ -36,25 +36,6 @@ gp_write_file(struct gf_printer *gp, char *buf, size_t len)
return len;
}
-static ssize_t
-gp_write_buf(struct gf_printer *gp, char *buf, size_t len)
-{
- struct iovec *iov = gp->priv;
-
- if (iov->iov_len < len) {
- gf_msg("graph-print", GF_LOG_ERROR, 0, LG_MSG_BUFFER_FULL,
- "buffer full");
-
- return -1;
- }
-
- memcpy(iov->iov_base, buf, len);
- iov->iov_base += len;
- iov->iov_len -= len;
-
- return len;
-}
-
static int
gpprintf(struct gf_printer *gp, const char *format, ...)
{
@@ -152,43 +133,3 @@ glusterfs_graph_print_file(FILE *file, glusterfs_graph_t *graph)
return glusterfs_graph_print(&gp, graph);
}
-
-char *
-glusterfs_graph_print_buf(glusterfs_graph_t *graph)
-{
- FILE *f = NULL;
- struct iovec iov = {
- 0,
- };
- int len = 0;
- char *buf = NULL;
- struct gf_printer gp = {.write = gp_write_buf, .priv = &iov};
-
- f = fopen("/dev/null", "a");
- if (!f) {
- gf_msg("graph-print", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
- "cannot open /dev/null");
-
- return NULL;
- }
- len = glusterfs_graph_print_file(f, graph);
- fclose(f);
- if (len == -1)
- return NULL;
-
- buf = GF_CALLOC(1, len + 1, gf_common_mt_graph_buf);
- if (!buf) {
- return NULL;
- }
- iov.iov_base = buf;
- iov.iov_len = len;
-
- len = glusterfs_graph_print(&gp, graph);
- if (len == -1) {
- GF_FREE(buf);
-
- return NULL;
- }
-
- return buf;
-}
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
index 2a213d2c48e..13f298eb3bd 100644
--- a/libglusterfs/src/graph.c
+++ b/libglusterfs/src/graph.c
@@ -8,16 +8,32 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include <dlfcn.h>
-#include <netdb.h>
-#include <fnmatch.h>
-#include <stdlib.h>
-#include "defaults.h"
-#include <unistd.h>
-#include "syscall.h"
-#include <regex.h>
-#include "libglusterfs-messages.h"
+#include <stdint.h> // for uint32_t
+#include <sys/time.h> // for timeval
+#include <errno.h> // for EIO, errno, EINVAL, ENOMEM
+#include <fnmatch.h> // for fnmatch, FNM_NOESCAPE
+#include <openssl/sha.h> // for SHA256_DIGEST_LENGTH
+#include <regex.h> // for regmatch_t, regcomp
+#include <stdio.h> // for fclose, fopen, snprintf
+#include <stdlib.h> // for NULL, atoi, mkstemp
+#include <string.h> // for strcmp, strerror, memcpy
+#include <strings.h> // for rindex
+#include <sys/stat.h> // for stat
+#include <sys/time.h> // for gettimeofday
+#include <unistd.h> // for gethostname, getpid
+#include "glusterfs/common-utils.h" // for gf_strncpy, gf_time_fmt
+#include "glusterfs/defaults.h"
+#include "glusterfs/dict.h" // for dict_foreach, dict_set_...
+#include "glusterfs/globals.h" // for xlator_t, xlator_list_t
+#include "glusterfs/glusterfs.h" // for glusterfs_graph_t, glus...
+#include "glusterfs/glusterfs-fops.h" // for GF_EVENT_GRAPH_NEW, GF_...
+#include "glusterfs/libglusterfs-messages.h" // for LG_MSG_GRAPH_ERROR, LG_...
+#include "glusterfs/list.h" // for list_add, list_del_init
+#include "glusterfs/logging.h" // for gf_msg, GF_LOG_ERROR
+#include "glusterfs/mem-pool.h" // for GF_FREE, gf_strdup, GF_...
+#include "glusterfs/mem-types.h" // for gf_common_mt_xlator_list_t
+#include "glusterfs/options.h" // for xlator_tree_reconfigure
+#include "glusterfs/syscall.h" // for sys_close, sys_stat
#if 0
static void
@@ -25,7 +41,7 @@ _gf_dump_details (int argc, char **argv)
{
extern FILE *gf_log_logfile;
int i = 0;
- char timestr[64];
+ char timestr[GF_TIMESTR_SIZE];
time_t utime = 0;
pid_t mypid = 0;
struct utsname uname_buf = {{0, }, };
@@ -114,6 +130,53 @@ out:
return cert_depth;
}
+xlator_t *
+glusterfs_get_last_xlator(glusterfs_graph_t *graph)
+{
+ xlator_t *trav = graph->first;
+ if (!trav)
+ return NULL;
+
+ while (trav->next)
+ trav = trav->next;
+
+ return trav;
+}
+
+xlator_t *
+glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl)
+{
+ xlator_list_t *unlink = NULL;
+ xlator_list_t *prev = NULL;
+ xlator_list_t **tmp = NULL;
+ xlator_t *next_child = NULL;
+ xlator_t *xl = NULL;
+
+ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) {
+ if ((*tmp)->xlator == cxl) {
+ unlink = *tmp;
+ *tmp = (*tmp)->next;
+ if (*tmp)
+ next_child = (*tmp)->xlator;
+ break;
+ }
+ prev = *tmp;
+ }
+
+ if (!prev)
+ xl = pxl;
+ else if (prev->xlator)
+ xl = prev->xlator->graph->last_xl;
+
+ if (xl)
+ xl->next = next_child;
+ if (next_child)
+ next_child->prev = xl;
+
+ GF_FREE(unlink);
+ return next_child;
+}
+
int
glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl)
{
@@ -181,7 +244,7 @@ glusterfs_graph_insert(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
ixl->ctx = ctx;
ixl->graph = graph;
- ixl->options = get_new_dict();
+ ixl->options = dict_new();
if (!ixl->options)
goto err;
@@ -393,7 +456,7 @@ _log_if_unknown_option(dict_t *dict, char *key, data_t *value, void *data)
found = xlator_volume_option_get(xl, key);
if (!found) {
- gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_XLATOR_OPTION_INVALID,
+ gf_msg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_XLATOR_OPTION_INVALID,
"option '%s' is not recognized", key);
}
@@ -406,39 +469,29 @@ _xlator_check_unknown_options(xlator_t *xl, void *data)
dict_foreach(xl->options, _log_if_unknown_option, xl);
}
-int
+static int
glusterfs_graph_unknown_options(glusterfs_graph_t *graph)
{
xlator_foreach(graph->first, _xlator_check_unknown_options, NULL);
return 0;
}
-void
-fill_uuid(char *uuid, int size)
+static void
+fill_uuid(char *uuid, int size, struct timeval tv)
{
- char hostname[256] = {
- 0,
- };
- struct timeval tv = {
+ char hostname[50] = {
0,
};
- char now_str[64];
-
- if (gettimeofday(&tv, NULL) == -1) {
- gf_msg("graph", GF_LOG_ERROR, errno, LG_MSG_GETTIMEOFDAY_FAILED,
- "gettimeofday: "
- "failed");
- }
+ char now_str[GF_TIMESTR_SIZE];
- if (gethostname(hostname, 256) == -1) {
+ if (gethostname(hostname, sizeof(hostname) - 1) != 0) {
gf_msg("graph", GF_LOG_ERROR, errno, LG_MSG_GETHOSTNAME_FAILED,
- "gethostname: "
- "failed");
+ "gethostname failed");
+ hostname[sizeof(hostname) - 1] = '\0';
}
- gf_time_fmt(now_str, sizeof now_str, tv.tv_sec, gf_timefmt_dirent);
- snprintf(uuid, size, "%s-%d-%s:%" GF_PRI_SUSECONDS, hostname, getpid(),
- now_str, tv.tv_usec);
+ gf_time_fmt_tv(now_str, sizeof now_str, &tv, gf_timefmt_dirent);
+ snprintf(uuid, size, "%s-%d-%s", hostname, getpid(), now_str);
return;
}
@@ -514,14 +567,13 @@ glusterfs_graph_prepare(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
} else {
ret = glusterfs_graph_settop(graph, volume_name, _gf_false);
}
- if (!ret) {
- goto ok;
- }
- gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
- "glusterfs graph settop failed");
- return -1;
-ok:
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph settop failed");
+ errno = EINVAL;
+ return -1;
+ }
/* XXX: WORM VOLUME */
ret = glusterfs_graph_worm(graph, ctx);
@@ -569,7 +621,7 @@ ok:
/* XXX: DOB setting */
gettimeofday(&graph->dob, NULL);
- fill_uuid(graph->graph_uuid, 128);
+ fill_uuid(graph->graph_uuid, sizeof(graph->graph_uuid), graph->dob);
graph->id = ctx->graph_id++;
@@ -1092,6 +1144,8 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph)
ret = xlator_tree_free_memacct(graph->first);
list_del_init(&graph->list);
+ pthread_mutex_destroy(&graph->mutex);
+ pthread_cond_destroy(&graph->child_down_cond);
GF_FREE(graph);
return ret;
@@ -1134,6 +1188,33 @@ out:
}
int
+glusterfs_graph_fini(glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+
+ trav = graph->first;
+
+ while (trav) {
+ if (trav->init_succeeded) {
+ trav->cleanup_starting = 1;
+ trav->fini(trav);
+ if (trav->local_pool) {
+ mem_pool_destroy(trav->local_pool);
+ trav->local_pool = NULL;
+ }
+ if (trav->itable) {
+ inode_table_destroy(trav->itable);
+ trav->itable = NULL;
+ }
+ trav->init_succeeded = 0;
+ }
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+int
glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
glusterfs_graph_t **newgraph)
{
@@ -1256,3 +1337,544 @@ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
return 0;
}
+int
+glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx,
+ glusterfs_graph_t *parent_graph)
+{
+ if (parent_graph) {
+ if (parent_graph->first) {
+ xlator_destroy(parent_graph->first);
+ }
+ ctx->active = NULL;
+ GF_FREE(parent_graph);
+ parent_graph = NULL;
+ }
+ return 0;
+}
+
+void *
+glusterfs_graph_cleanup(void *arg)
+{
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_ctx_t *ctx = THIS->ctx;
+ int ret = -1;
+ graph = arg;
+
+ if (!graph)
+ return NULL;
+
+ /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN
+ * Then wait for GF_EVENT_CHILD_DOWN to get on the top
+ * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed
+ * to fini.
+ *
+ * During fini call, this will take a last unref on rpc and
+ * rpc_transport_object.
+ */
+ if (graph->first)
+ default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first);
+
+ ret = pthread_mutex_lock(&graph->mutex);
+ if (ret != 0) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Failed to acquire a lock");
+ goto out;
+ }
+ /* check and wait for CHILD_DOWN for top xlator*/
+ while (graph->used) {
+ ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex);
+ if (ret != 0)
+ gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "cond wait failed ");
+ }
+
+ ret = pthread_mutex_unlock(&graph->mutex);
+ if (ret != 0) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Failed to release a lock");
+ }
+
+ /* Though we got a child down on top xlator, we have to wait until
+ * all the notifier to exit. Because there should not be any threads
+ * that access xl variables.
+ */
+ pthread_mutex_lock(&ctx->notify_lock);
+ {
+ while (ctx->notifying)
+ pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock);
+ }
+ pthread_mutex_unlock(&ctx->notify_lock);
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+ glusterfs_graph_fini(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+out:
+ return NULL;
+}
+
+glusterfs_graph_t *
+glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name,
+ char *type)
+{
+ glusterfs_graph_t *parent_graph = NULL;
+ xlator_t *ixl = NULL;
+ int ret = -1;
+ parent_graph = GF_CALLOC(1, sizeof(*parent_graph),
+ gf_common_mt_glusterfs_graph_t);
+ if (!parent_graph)
+ goto out;
+
+ INIT_LIST_HEAD(&parent_graph->list);
+
+ ctx->active = parent_graph;
+ ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t);
+ if (!ixl)
+ goto out;
+
+ ixl->ctx = ctx;
+ ixl->graph = parent_graph;
+ ixl->options = dict_new();
+ if (!ixl->options)
+ goto out;
+
+ ixl->name = gf_strdup(name);
+ if (!ixl->name)
+ goto out;
+
+ ixl->is_autoloaded = 1;
+
+ if (xlator_set_type(ixl, type) == -1) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "%s (%s) set type failed", name, type);
+ goto out;
+ }
+
+ glusterfs_graph_set_first(parent_graph, ixl);
+ parent_graph->top = ixl;
+ ixl = NULL;
+
+ gettimeofday(&parent_graph->dob, NULL);
+ fill_uuid(parent_graph->graph_uuid, 128, parent_graph->dob);
+ parent_graph->id = ctx->graph_id++;
+ ret = 0;
+out:
+ if (ixl)
+ xlator_destroy(ixl);
+
+ if (ret) {
+ glusterfs_muxsvc_cleanup_parent(ctx, parent_graph);
+ parent_graph = NULL;
+ }
+ return parent_graph;
+}
+
+int
+glusterfs_svc_mux_pidfile_cleanup(gf_volfile_t *volfile_obj)
+{
+ if (!volfile_obj || !volfile_obj->pidfp)
+ return 0;
+
+ gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", volfile_obj->vol_id);
+
+ lockf(fileno(volfile_obj->pidfp), F_ULOCK, 0);
+ fclose(volfile_obj->pidfp);
+ volfile_obj->pidfp = NULL;
+
+ return 0;
+}
+
+int
+glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
+{
+ xlator_t *last_xl = NULL;
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *parent_graph = NULL;
+ pthread_t clean_graph = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *xl = NULL;
+
+ if (!ctx || !ctx->active || !volfile_obj)
+ goto out;
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+ parent_graph = ctx->active;
+ graph = volfile_obj->graph;
+ if (!graph)
+ goto unlock;
+ if (graph->first)
+ xl = graph->first;
+
+ last_xl = graph->last_xl;
+ if (last_xl)
+ last_xl->next = NULL;
+ if (!xl || xl->cleanup_starting)
+ goto unlock;
+
+ xl->cleanup_starting = 1;
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
+ "detaching child %s", volfile_obj->vol_id);
+
+ list_del_init(&volfile_obj->volfile_list);
+ glusterfs_mux_xlator_unlink(parent_graph->top, xl);
+ glusterfs_svc_mux_pidfile_cleanup(volfile_obj);
+ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+ parent_graph->xl_count -= graph->xl_count;
+ parent_graph->leaf_count -= graph->leaf_count;
+ parent_graph->id++;
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+out:
+ if (!ret) {
+ list_del_init(&volfile_obj->volfile_list);
+ if (graph) {
+ ret = gf_thread_create_detached(
+ &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean");
+ if (ret) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "%s failed to create clean "
+ "up thread",
+ volfile_obj->vol_id);
+ ret = 0;
+ }
+ }
+ GF_FREE(volfile_obj);
+ }
+ return ret;
+}
+
+int
+glusterfs_svc_mux_pidfile_setup(gf_volfile_t *volfile_obj, const char *pid_file)
+{
+ int ret = -1;
+ FILE *pidfp = NULL;
+
+ if (!pid_file || !volfile_obj)
+ goto out;
+
+ if (volfile_obj->pidfp) {
+ ret = 0;
+ goto out;
+ }
+ pidfp = fopen(pid_file, "a+");
+ if (!pidfp) {
+ goto out;
+ }
+ volfile_obj->pidfp = pidfp;
+
+ ret = lockf(fileno(pidfp), F_TLOCK, 0);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterfs_svc_mux_pidfile_update(gf_volfile_t *volfile_obj,
+ const char *pid_file, pid_t pid)
+{
+ int ret = 0;
+ FILE *pidfp = NULL;
+ int old_pid;
+
+ if (!volfile_obj->pidfp) {
+ ret = glusterfs_svc_mux_pidfile_setup(volfile_obj, pid_file);
+ if (ret == -1)
+ goto out;
+ }
+ pidfp = volfile_obj->pidfp;
+ ret = fscanf(pidfp, "%d", &old_pid);
+ if (ret <= 0) {
+ goto update;
+ }
+ if (old_pid == pid) {
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "Old pid=%d found in pidfile %s. Cleaning the old pid and "
+ "Updating new pid=%d",
+ old_pid, pid_file, pid);
+ }
+update:
+ ret = sys_ftruncate(fileno(pidfp), 0);
+ if (ret) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "pidfile %s truncation failed", pid_file);
+ goto out;
+ }
+
+ ret = fprintf(pidfp, "%d\n", pid);
+ if (ret <= 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
+ pid_file);
+ goto out;
+ }
+
+ ret = fflush(pidfp);
+ if (ret) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
+ pid_file);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterfs_update_mux_pid(dict_t *dict, gf_volfile_t *volfile_obj)
+{
+ char *file = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("graph", dict, out);
+ GF_VALIDATE_OR_GOTO("graph", volfile_obj, out);
+
+ ret = dict_get_str(dict, "pidfile", &file);
+ if (ret < 0) {
+ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "Failed to get pidfile from dict for volfile_id=%s",
+ volfile_obj->vol_id);
+ }
+
+ ret = glusterfs_svc_mux_pidfile_update(volfile_obj, file, getpid());
+ if (ret < 0) {
+ ret = -1;
+ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "Failed to update "
+ "the pidfile for volfile_id=%s",
+ volfile_obj->vol_id);
+
+ goto out;
+ }
+
+ if (ret == 1)
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "PID %d updated in pidfile=%s", getpid(), file);
+ ret = 0;
+out:
+ return ret;
+}
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict)
+{
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *parent_graph = NULL;
+ glusterfs_graph_t *clean_graph = NULL;
+ int ret = -1;
+ xlator_t *xl = NULL;
+ xlator_t *last_xl = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+ pthread_t thread_id = {
+ 0,
+ };
+
+ if (!ctx)
+ goto out;
+ parent_graph = ctx->active;
+ graph = glusterfs_graph_construct(fp);
+ if (!graph) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to construct the graph");
+ goto out;
+ }
+ graph->parent_down = 0;
+ graph->last_xl = glusterfs_get_last_xlator(graph);
+
+ for (xl = graph->first; xl; xl = xl->next) {
+ if (strcmp(xl->type, "mount/fuse") == 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_ATTACH_FAILED,
+ "fuse xlator cannot be specified in volume file");
+ goto out;
+ }
+ }
+
+ graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph));
+ xl = graph->first;
+ /* TODO memory leaks everywhere need to free graph in case of error */
+ if (glusterfs_graph_prepare(graph, ctx, xl->name)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to prepare graph for xlator %s", xl->name);
+ ret = -1;
+ goto out;
+ } else if (glusterfs_graph_init(graph)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to initialize graph for xlator %s", xl->name);
+ ret = -1;
+ goto out;
+ } else if (glusterfs_graph_parent_up(graph)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to link the graphs for xlator %s ", xl->name);
+ ret = -1;
+ goto out;
+ }
+
+ if (!parent_graph) {
+ parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd",
+ "debug/io-stats");
+ if (!parent_graph)
+ goto out;
+ ((xlator_t *)parent_graph->top)->next = xl;
+ clean_graph = parent_graph;
+ } else {
+ last_xl = parent_graph->last_xl;
+ if (last_xl)
+ last_xl->next = xl;
+ xl->prev = last_xl;
+ }
+ parent_graph->last_xl = graph->last_xl;
+
+ ret = glusterfs_xlator_link(parent_graph->top, xl);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "parent up notification failed");
+ goto out;
+ }
+ parent_graph->xl_count += graph->xl_count;
+ parent_graph->leaf_count += graph->leaf_count;
+ parent_graph->id++;
+
+ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+ if (!volfile_obj) {
+ ret = -1;
+ goto out;
+ }
+ volfile_obj->pidfp = NULL;
+ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
+ volfile_id);
+
+ if (strcmp(ctx->cmd_args.process_name, "glustershd") == 0) {
+ ret = glusterfs_update_mux_pid(dict, volfile_obj);
+ if (ret == -1) {
+ GF_FREE(volfile_obj);
+ goto out;
+ }
+ }
+
+ graph->used = 1;
+ parent_graph->id++;
+ list_add(&graph->list, &ctx->graphs);
+ INIT_LIST_HEAD(&volfile_obj->volfile_list);
+ volfile_obj->graph = graph;
+ memcpy(volfile_obj->volfile_checksum, checksum,
+ sizeof(volfile_obj->volfile_checksum));
+ list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list);
+ gf_log_dump_graph(fp, graph);
+ graph = NULL;
+
+ ret = 0;
+out:
+ if (ret) {
+ if (graph) {
+ gluster_graph_take_reference(graph->first);
+ ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup,
+ graph, "graph_clean");
+ if (ret) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "%s failed to create clean "
+ "up thread",
+ volfile_id);
+ ret = 0;
+ }
+ }
+ if (clean_graph)
+ glusterfs_muxsvc_cleanup_parent(ctx, clean_graph);
+ }
+ return ret;
+}
+
+int
+glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ gf_volfile_t *volfile_obj, char *checksum,
+ dict_t *dict)
+{
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+ char vol_id[NAME_MAX + 1];
+
+ int ret = -1;
+
+ if (!ctx) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "ctx is NULL");
+ goto out;
+ }
+
+ /* Change the message id */
+ if (!volfile_obj) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "failed to get volfile object");
+ goto out;
+ }
+
+ oldvolfile_graph = volfile_obj->graph;
+ if (!oldvolfile_graph) {
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
+
+ if (!newvolfile_graph) {
+ goto out;
+ }
+ newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph);
+
+ glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+ ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id);
+ if (ret < 0)
+ goto out;
+ ret = glusterfs_process_svc_detach(ctx, volfile_obj);
+ if (ret) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Could not detach "
+ "old graph. Aborting the reconfiguration operation");
+ goto out;
+ }
+ volfile_obj = NULL;
+ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id,
+ checksum, dict);
+ goto out;
+ }
+
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Only options have changed in the"
+ " new graph");
+
+ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Could not reconfigure "
+ "new options in old graph");
+ goto out;
+ }
+ memcpy(volfile_obj->volfile_checksum, checksum,
+ sizeof(volfile_obj->volfile_checksum));
+
+ ret = 0;
+out:
+
+ if (newvolfile_graph)
+ glusterfs_graph_destroy(newvolfile_graph);
+
+ return ret;
+}
diff --git a/libglusterfs/src/graph.l b/libglusterfs/src/graph.l
index 8af28a43539..b9d4b2b6828 100644
--- a/libglusterfs/src/graph.l
+++ b/libglusterfs/src/graph.l
@@ -14,35 +14,27 @@
%{
#define YYSTYPE char *
-#include "xlator.h"
+#include "glusterfs/xlator.h"
#include "y.tab.h"
#include <string.h>
-#define START_STRSIZE 32
static char *text;
-static int text_asize;
static int text_size;
void append_string(const char *str, int size)
{
- int new_size = text_size + size + 1;
- if (new_size > text_asize) {
- new_size += START_STRSIZE - 1;
- new_size &= -START_STRSIZE;
- if (!text) {
- text = GF_CALLOC (1, new_size,
- gf_common_mt_char);
- } else {
- text = GF_REALLOC (text, new_size);
- }
- if (!text) {
- return;
- }
- text_asize = new_size;
- }
- memcpy(text + text_size, str, size);
- text_size += size;
- text[text_size] = 0;
+ int new_size = text_size + size + 1;
+ if (!text) {
+ text = GF_CALLOC (1, new_size, gf_common_mt_char);
+ } else {
+ text = GF_REALLOC (text, new_size);
+ }
+ if (!text) {
+ return;
+ }
+ memcpy(text + text_size, str, size);
+ text_size += size;
+ text[text_size] = 0;
}
%}
@@ -65,12 +57,14 @@ TYPE [t][y][p][e]
\\. { append_string (yytext + 1, yyleng - 1); }
\" {
if (0) {
- yyunput (0, NULL);
+ yyunput (0, NULL);
}
BEGIN (INITIAL);
graphyylval = text;
+ text = NULL;
+ text_size = 0;
return STRING_TOK;
- }
+ }
}
[^ \t\r\n\"\\]+ { graphyylval = gf_strdup (yytext) ; return ID; }
[ \t\r\n]+ ;
diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y
index 3c3f7b5bb82..e63febdc08b 100644
--- a/libglusterfs/src/graph.y
+++ b/libglusterfs/src/graph.y
@@ -22,11 +22,11 @@
#define RELAX_POISONING
-#include "xlator.h"
-#include "graph-utils.h"
-#include "logging.h"
-#include "syscall.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/graph-utils.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
static int new_volume (char *name);
static int volume_type (char *type);
@@ -123,7 +123,7 @@ new_volume (char *name)
int ret = 0;
if (!name) {
- gf_msg_debug ("parser", 0,"Invalid argument name: '%s'", name);
+ gf_msg_debug ("parser", 0,"Invalid argument name");
ret = -1;
goto out;
}
@@ -164,7 +164,8 @@ new_volume (char *name)
goto out;
}
- curr->options = get_new_dict ();
+ INIT_LIST_HEAD(&curr->volume_options);
+ curr->options = dict_new ();
if (!curr->options) {
GF_FREE (curr->name);
@@ -542,6 +543,9 @@ glusterfs_graph_new ()
INIT_LIST_HEAD (&graph->list);
+ pthread_mutex_init(&graph->mutex, NULL);
+ pthread_cond_init(&graph->child_down_cond, NULL);
+
gettimeofday (&graph->dob, NULL);
return graph;
diff --git a/libglusterfs/src/hashfn.c b/libglusterfs/src/hashfn.c
index 5c4561f1ee8..d2237e99f83 100644
--- a/libglusterfs/src/hashfn.c
+++ b/libglusterfs/src/hashfn.c
@@ -11,24 +11,12 @@
#include <stdint.h>
#include <stdlib.h>
-#include "hashfn.h"
-
#define get16bits(d) (*((const uint16_t *)(d)))
#define DM_DELTA 0x9E3779B9
#define DM_FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
#define DM_PARTROUNDS 6 /* 6 gets complete mixing */
-uint32_t
-ReallySimpleHash(char *path, int len)
-{
- uint32_t hash = 0;
- for (; len > 0; len--)
- hash ^= (char)path[len];
-
- return hash;
-}
-
/*
This is apparently the "fastest hash function for strings".
Written by Paul Hsieh <http://www.azillionmonkeys.com/qed/hash.html>
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index a5dcac7d7e1..dbadf77442d 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -8,21 +8,114 @@
cases as published by the Free Software Foundation.
*/
-#include "inode.h"
-#include "common-utils.h"
-#include "statedump.h"
+#include "glusterfs/inode.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/statedump.h"
#include <pthread.h>
#include <sys/types.h>
#include <stdint.h>
-#include "list.h"
-#include <time.h>
+#include "glusterfs/list.h"
#include <assert.h>
-#include "libglusterfs-messages.h"
+#include "glusterfs/libglusterfs-messages.h"
/* TODO:
move latest accessed dentry to list_head of inode
*/
+// clang-format off
+/*
+
+Details as per Xavi:
+
+ I think we should have 3 lists: active, lru and invalidate.
+
+We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of
+refs, invalidate_sent flag and moving from one list to another must be done
+atomically.
+
+With this information, these are the states that cause a transition:
+
+ refs nlookups inv_sent op
+ 1 0 0 unref -> refs = 0, active--->destroy
+ 1 1 0 unref -> refs = 0, active--->lru
+ 1 1 0 forget -> nlookups = 0, active--->active
+ *0 1 0 forget -> nlookups = 0, lru--->destroy
+ *0 1 1 forget -> nlookups = 0, invalidate--->destroy
+ 0 1 0 ref -> refs = 1, lru--->active
+ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active
+ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate
+ 1 1 1 unref -> refs = 0, invalidate--->invalidate
+ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active
+
+(*) technically these combinations cannot happen because a forget sent by the
+kernel first calls ref() and then unref(). However it's equivalent.
+
+overflow means that lru list has grown beyond the limit and the inode needs to
+be invalidated. All other combinations do not cause a change in state or are not
+possible.
+
+Based on this, the code could be similar to this:
+
+ ref(inode, inv)
+ {
+ if (refs == 0) {
+ if (inv_sent) {
+ invalidate_count--;
+ inv_sent = 0;
+ } else {
+ lru_count--;
+ }
+ if (inv) {
+ inv_sent = 1;
+ invalidate_count++;
+ list_move(inode, invalidate);
+ } else {
+ active_count++;
+ list_move(inode, active);
+ }
+ }
+ refs++;
+ }
+
+ unref(inode, clear)
+ {
+ if (clear && inv_sent) {
+ // there is a case of fuse itself sending forget, without
+ // invalidate, after entry delete, like unlink(), rmdir().
+ inv_sent = 0;
+ invalidate_count--;
+ active_count++;
+ list_move(inode, active);
+ }
+ refs--;
+ if ((refs == 0) && !inv_sent) {
+ active_count--;
+ if (nlookups == 0) {
+ destroy(inode);
+ } else {
+ lru_count++;
+ list_move(inode, lru);
+ }
+ }
+ }
+
+ forget(inode)
+ {
+ ref(inode, false);
+ nlookups--;
+ unref(inode, true);
+ }
+
+ overflow(inode)
+ {
+ ref(inode, true);
+ invalidator(inode);
+ unref(inode, false);
+ }
+
+*/
+// clang-format on
+
#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
{ \
int i = 1; \
@@ -37,7 +130,7 @@
}
static inode_t *
-__inode_unref(inode_t *inode);
+__inode_unref(inode_t *inode, bool clear);
static int
inode_table_prune(inode_table_t *table);
@@ -65,27 +158,15 @@ hash_dentry(inode_t *parent, const char *name, int mod)
static int
hash_gfid(uuid_t uuid, int mod)
{
- int ret = 0;
-
- ret = uuid[15] + (uuid[14] << 8);
-
- return ret;
+ return ((uuid[15] + (uuid[14] << 8)) % mod);
}
static void
-__dentry_hash(dentry_t *dentry)
+__dentry_hash(dentry_t *dentry, const int hash)
{
inode_table_t *table = NULL;
- int hash = 0;
-
- if (!dentry) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
- "dentry not found");
- return;
- }
table = dentry->inode->table;
- hash = hash_dentry(dentry->parent, dentry->name, table->hashsize);
list_del_init(&dentry->hash);
list_add(&dentry->hash, &table->name_hash[hash]);
@@ -94,49 +175,44 @@ __dentry_hash(dentry_t *dentry)
static int
__is_dentry_hashed(dentry_t *dentry)
{
- if (!dentry) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
- "dentry not found");
- return 0;
- }
-
return !list_empty(&dentry->hash);
}
static void
__dentry_unhash(dentry_t *dentry)
{
- if (!dentry) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
- "dentry not found");
- return;
- }
-
list_del_init(&dentry->hash);
}
static void
-__dentry_unset(dentry_t *dentry)
+dentry_destroy(dentry_t *dentry)
{
- if (!dentry) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
- "dentry not found");
+ if (!dentry)
return;
- }
+
+ GF_FREE(dentry->name);
+ dentry->name = NULL;
+ mem_put(dentry);
+
+ return;
+}
+
+static dentry_t *
+__dentry_unset(dentry_t *dentry)
+{
+ if (!dentry)
+ return NULL;
__dentry_unhash(dentry);
list_del_init(&dentry->inode_list);
- GF_FREE(dentry->name);
- dentry->name = NULL;
-
if (dentry->parent) {
- __inode_unref(dentry->parent);
+ __inode_unref(dentry->parent, false);
dentry->parent = NULL;
}
- mem_put(dentry);
+ return dentry;
}
static int
@@ -156,15 +232,15 @@ __foreach_ancestor_dentry(dentry_t *dentry,
ret = per_dentry_fn(dentry, data);
if (ret) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PER_DENTRY_FAILED,
- "per dentry fn returned %d", ret);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PER_DENTRY_FAILED,
+ "ret=%d", ret, NULL);
goto out;
}
parent = dentry->parent;
if (!parent) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PARENT_DENTRY_NOT_FOUND,
- "parent not found");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PARENT_DENTRY_NOT_FOUND,
+ NULL);
goto out;
}
@@ -195,22 +271,12 @@ static int
__is_dentry_cyclic(dentry_t *dentry)
{
int ret = 0;
- inode_t *inode = NULL;
- char *name = "<nul>";
ret = __foreach_ancestor_dentry(dentry, __check_cycle, dentry->inode);
if (ret) {
- inode = dentry->inode;
-
- if (dentry->name)
- name = dentry->name;
-
- gf_msg(dentry->inode->table->name, GF_LOG_CRITICAL, 0,
- LG_MSG_DENTRY_CYCLIC_LOOP,
- "detected cyclic loop "
- "formation during inode linkage. inode (%s) linking "
- "under itself as %s",
- uuid_utoa(inode->gfid), name);
+ gf_smsg(dentry->inode->table->name, GF_LOG_CRITICAL, 0,
+ LG_MSG_DENTRY_CYCLIC_LOOP, "gfid=%s name=-%s",
+ uuid_utoa(dentry->inode->gfid), dentry->name, NULL);
}
return ret;
@@ -219,41 +285,19 @@ __is_dentry_cyclic(dentry_t *dentry)
static void
__inode_unhash(inode_t *inode)
{
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
- return;
- }
-
list_del_init(&inode->hash);
}
static int
__is_inode_hashed(inode_t *inode)
{
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
- return 0;
- }
-
return !list_empty(&inode->hash);
}
static void
-__inode_hash(inode_t *inode)
+__inode_hash(inode_t *inode, const int hash)
{
- inode_table_t *table = NULL;
- int hash = 0;
-
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
- return;
- }
-
- table = inode->table;
- hash = hash_gfid(inode->gfid, 65536);
+ inode_table_t *table = inode->table;
list_del_init(&inode->hash);
list_add(&inode->hash, &table->inode_hash[hash]);
@@ -265,12 +309,6 @@ __dentry_search_for_inode(inode_t *inode, uuid_t pargfid, const char *name)
dentry_t *dentry = NULL;
dentry_t *tmp = NULL;
- if (!inode || !name) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
- "inode || name not found");
- return NULL;
- }
-
/* earlier, just the ino was sent, which could have been 0, now
we deal with gfid, and if sent gfid is null or 0, no need to
continue with the check */
@@ -296,26 +334,20 @@ __inode_ctx_free(inode_t *inode)
xlator_t *xl = NULL;
xlator_t *old_THIS = NULL;
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
- return;
- }
-
if (!inode->_ctx) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL,
- "_ctx not found");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL, NULL);
goto noctx;
}
- for (index = 0; index < inode->table->xl->graph->xl_count; index++) {
+ for (index = 0; index < inode->table->ctxcount; index++) {
if (inode->_ctx[index].value1 || inode->_ctx[index].value2) {
xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
- old_THIS = THIS;
- THIS = xl;
- if (!xl->call_cleanup && xl->cbks->forget)
+ if (xl && !xl->call_cleanup && xl->cbks->forget) {
+ old_THIS = THIS;
+ THIS = xl;
xl->cbks->forget(xl, inode);
- THIS = old_THIS;
+ THIS = old_THIS;
+ }
}
}
@@ -329,12 +361,6 @@ noctx:
static void
__inode_destroy(inode_t *inode)
{
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
- return;
- }
-
__inode_ctx_free(inode);
LOCK_DESTROY(&inode->lock);
@@ -377,9 +403,6 @@ inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode)
static void
__inode_activate(inode_t *inode)
{
- if (!inode)
- return;
-
list_move(&inode->list, &inode->table->active);
inode->table->active_size++;
}
@@ -390,19 +413,13 @@ __inode_passivate(inode_t *inode)
dentry_t *dentry = NULL;
dentry_t *t = NULL;
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
- return;
- }
-
list_move_tail(&inode->list, &inode->table->lru);
inode->table->lru_size++;
list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
{
if (!__is_dentry_hashed(dentry))
- __dentry_unset(dentry);
+ dentry_destroy(__dentry_unset(dentry));
}
}
@@ -412,12 +429,6 @@ __inode_retire(inode_t *inode)
dentry_t *dentry = NULL;
dentry_t *t = NULL;
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
- return;
- }
-
list_move_tail(&inode->list, &inode->table->purge);
inode->table->purge_size++;
@@ -425,7 +436,7 @@ __inode_retire(inode_t *inode)
list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
{
- __dentry_unset(dentry);
+ dentry_destroy(__dentry_unset(dentry));
}
}
@@ -446,17 +457,12 @@ out:
}
static inode_t *
-__inode_unref(inode_t *inode)
+__inode_unref(inode_t *inode, bool clear)
{
int index = 0;
xlator_t *this = NULL;
uint64_t nlookup = 0;
- if (!inode)
- return NULL;
-
- this = THIS;
-
/*
* Root inode should always be in active list of inode table. So unrefs
* on root inode are no-ops.
@@ -464,6 +470,48 @@ __inode_unref(inode_t *inode)
if (__is_root_gfid(inode->gfid))
return inode;
+ /*
+ * No need to acquire inode table's lock
+ * as __inode_unref is called after acquiding
+ * the inode table's lock.
+ */
+ if (inode->table->cleanup_started && !inode->ref)
+ /*
+ * There is a good chance that, the inode
+ * on which unref came has already been
+ * zero refed and added to the purge list.
+ * This can happen when inode table is
+ * being destroyed (glfs_fini is something
+ * which destroys the inode table).
+ *
+ * Consider a directory 'a' which has a file
+ * 'b'. Now as part of inode table destruction
+ * zero refing of inodes does not happen from
+ * leaf to the root. It happens in the order
+ * inodes are present in the list. So, in this
+ * example, the dentry of 'b' would have its
+ * parent set to the inode of 'a'. So if
+ * 'a' gets zero refed first (as part of
+ * inode table cleanup) and then 'b' has to
+ * zero refed, then dentry_unset is called on
+ * the dentry of 'b' and it further goes on to
+ * call inode_unref on b's parent which is 'a'.
+ * In this situation, GF_ASSERT would be called
+ * below as the refcount of 'a' has been already set
+ * to zero.
+ *
+ * So return the inode if the inode table cleanup
+ * has already started and inode refcount is 0.
+ */
+ return inode;
+
+ this = THIS;
+
+ if (clear && inode->in_invalidate_list) {
+ inode->in_invalidate_list = false;
+ inode->table->invalidate_size--;
+ __inode_activate(inode);
+ }
GF_ASSERT(inode->ref);
--inode->ref;
@@ -474,7 +522,7 @@ __inode_unref(inode_t *inode)
inode->_ctx[index].ref--;
}
- if (!inode->ref) {
+ if (!inode->ref && !inode->in_invalidate_list) {
inode->table->active_size--;
nlookup = GF_ATOMIC_GET(inode->nlookup);
@@ -488,7 +536,7 @@ __inode_unref(inode_t *inode)
}
static inode_t *
-__inode_ref(inode_t *inode)
+__inode_ref(inode_t *inode, bool is_invalidate)
{
int index = 0;
xlator_t *this = NULL;
@@ -498,11 +546,6 @@ __inode_ref(inode_t *inode)
this = THIS;
- if (!inode->ref) {
- inode->table->lru_size--;
- __inode_activate(inode);
- }
-
/*
* Root inode should always be in active list of inode table. So unrefs
* on root inode are no-ops. If we do not allow unrefs but allow refs,
@@ -514,6 +557,22 @@ __inode_ref(inode_t *inode)
if (__is_root_gfid(inode->gfid) && inode->ref)
return inode;
+ if (!inode->ref) {
+ if (inode->in_invalidate_list) {
+ inode->in_invalidate_list = false;
+ inode->table->invalidate_size--;
+ } else {
+ inode->table->lru_size--;
+ }
+ if (is_invalidate) {
+ inode->in_invalidate_list = true;
+ inode->table->invalidate_size++;
+ list_move_tail(&inode->list, &inode->table->invalidate);
+ } else {
+ __inode_activate(inode);
+ }
+ }
+
inode->ref++;
index = __inode_get_xl_index(inode, this);
@@ -537,7 +596,7 @@ inode_unref(inode_t *inode)
pthread_mutex_lock(&table->lock);
{
- inode = __inode_unref(inode);
+ inode = __inode_unref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -558,7 +617,7 @@ inode_ref(inode_t *inode)
pthread_mutex_lock(&table->lock);
{
- inode = __inode_ref(inode);
+ inode = __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -566,16 +625,10 @@ inode_ref(inode_t *inode)
}
static dentry_t *
-__dentry_create(inode_t *inode, inode_t *parent, const char *name)
+dentry_create(inode_t *inode, inode_t *parent, const char *name)
{
dentry_t *newd = NULL;
- if (!inode || !parent || !name) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
- "inode || parent || name not found");
- return NULL;
- }
-
newd = mem_get0(parent->table->dentry_pool);
if (newd == NULL) {
goto out;
@@ -591,10 +644,6 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name)
goto out;
}
- if (parent)
- newd->parent = __inode_ref(parent);
-
- list_add(&newd->inode_list, &inode->dentry_list);
newd->inode = inode;
out:
@@ -602,18 +651,10 @@ out:
}
static inode_t *
-__inode_create(inode_table_t *table)
+inode_create(inode_table_t *table)
{
inode_t *newi = NULL;
- if (!table) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
- LG_MSG_INODE_TABLE_NOT_FOUND,
- "table not "
- "found");
- return NULL;
- }
-
newi = mem_get0(table->inode_pool);
if (!newi) {
goto out;
@@ -637,11 +678,7 @@ __inode_create(inode_table_t *table)
goto out;
}
- list_add(&newi->list, &table->lru);
- table->lru_size++;
-
out:
-
return newi;
}
@@ -658,14 +695,16 @@ inode_new(inode_table_t *table)
return NULL;
}
- pthread_mutex_lock(&table->lock);
- {
- inode = __inode_create(table);
- if (inode != NULL) {
- __inode_ref(inode);
+ inode = inode_create(table);
+ if (inode) {
+ pthread_mutex_lock(&table->lock);
+ {
+ list_add(&inode->list, &table->lru);
+ table->lru_size++;
+ __inode_ref(inode, false);
}
+ pthread_mutex_unlock(&table->lock);
}
- pthread_mutex_unlock(&table->lock);
return inode;
}
@@ -684,9 +723,6 @@ __inode_ref_reduce_by_n(inode_t *inode, uint64_t nref)
{
uint64_t nlookup = 0;
- if (!inode)
- return NULL;
-
GF_ASSERT(inode->ref >= nref);
inode->ref -= nref;
@@ -726,17 +762,12 @@ inode_forget_atomic(inode_t *inode, uint64_t nlookup)
}
dentry_t *
-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name)
+__dentry_grep(inode_table_t *table, inode_t *parent, const char *name,
+ const int hash)
{
- int hash = 0;
dentry_t *dentry = NULL;
dentry_t *tmp = NULL;
- if (!table || !name || !parent)
- return NULL;
-
- hash = hash_dentry(parent, name, table->hashsize);
-
list_for_each_entry(tmp, &table->name_hash[hash], hash)
{
if (tmp->parent == parent && !strcmp(tmp->name, name)) {
@@ -761,15 +792,16 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name)
return NULL;
}
+ int hash = hash_dentry(parent, name, table->hashsize);
+
pthread_mutex_lock(&table->lock);
{
- dentry = __dentry_grep(table, parent, name);
-
- if (dentry)
+ dentry = __dentry_grep(table, parent, name, hash);
+ if (dentry) {
inode = dentry->inode;
-
- if (inode)
- __inode_ref(inode);
+ if (inode)
+ __inode_ref(inode, false);
+ }
}
pthread_mutex_unlock(&table->lock);
@@ -788,6 +820,9 @@ inode_resolve(inode_table_t *table, char *path)
parent = inode_ref(table->root);
str = tmp = gf_strdup(path);
+ if (str == NULL) {
+ goto out;
+ }
while (1) {
bname = strtok_r(str, "/", &saveptr);
@@ -833,17 +868,18 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
return ret;
}
+ int hash = hash_dentry(parent, name, table->hashsize);
+
pthread_mutex_lock(&table->lock);
{
- dentry = __dentry_grep(table, parent, name);
-
- if (dentry)
+ dentry = __dentry_grep(table, parent, name, hash);
+ if (dentry) {
inode = dentry->inode;
-
- if (inode) {
- gf_uuid_copy(gfid, inode->gfid);
- *type = inode->ia_type;
- ret = 0;
+ if (inode) {
+ gf_uuid_copy(gfid, inode->gfid);
+ *type = inode->ia_type;
+ ret = 0;
+ }
}
}
pthread_mutex_unlock(&table->lock);
@@ -864,25 +900,14 @@ __is_root_gfid(uuid_t gfid)
}
inode_t *
-__inode_find(inode_table_t *table, uuid_t gfid)
+__inode_find(inode_table_t *table, uuid_t gfid, const int hash)
{
inode_t *inode = NULL;
inode_t *tmp = NULL;
- int hash = 0;
-
- if (!table) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
- LG_MSG_INODE_TABLE_NOT_FOUND,
- "table not "
- "found");
- goto out;
- }
if (__is_root_gfid(gfid))
return table->root;
- hash = hash_gfid(gfid, 65536);
-
list_for_each_entry(tmp, &table->inode_hash[hash], hash)
{
if (gf_uuid_compare(tmp->gfid, gfid) == 0) {
@@ -891,7 +916,6 @@ __inode_find(inode_table_t *table, uuid_t gfid)
}
}
-out:
return inode;
}
@@ -908,11 +932,13 @@ inode_find(inode_table_t *table, uuid_t gfid)
return NULL;
}
+ int hash = hash_gfid(gfid, 65536);
+
pthread_mutex_lock(&table->lock);
{
- inode = __inode_find(table, gfid);
+ inode = __inode_find(table, gfid, hash);
if (inode)
- __inode_ref(inode);
+ __inode_ref(inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -921,24 +947,16 @@ inode_find(inode_table_t *table, uuid_t gfid)
static inode_t *
__inode_link(inode_t *inode, inode_t *parent, const char *name,
- struct iatt *iatt)
+ struct iatt *iatt, const int dhash)
{
dentry_t *dentry = NULL;
dentry_t *old_dentry = NULL;
inode_t *old_inode = NULL;
inode_table_t *table = NULL;
inode_t *link_inode = NULL;
-
- if (!inode) {
- errno = EINVAL;
- return NULL;
- }
+ char link_uuid_str[64] = {0}, parent_uuid_str[64] = {0};
table = inode->table;
- if (!table) {
- errno = EINVAL;
- return NULL;
- }
if (parent) {
/* We should prevent inode linking between different
@@ -976,14 +994,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
return NULL;
}
- old_inode = __inode_find(table, iatt->ia_gfid);
+ int ihash = hash_gfid(iatt->ia_gfid, 65536);
+
+ old_inode = __inode_find(table, iatt->ia_gfid, ihash);
if (old_inode) {
link_inode = old_inode;
} else {
gf_uuid_copy(inode->gfid, iatt->ia_gfid);
inode->ia_type = iatt->ia_type;
- __inode_hash(inode);
+ __inode_hash(inode, ihash);
}
} else {
/* @old_inode serves another important purpose - it indicates
@@ -998,40 +1018,40 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
old_inode = inode;
}
- if (name) {
- if (!strcmp(name, ".") || !strcmp(name, ".."))
- return link_inode;
-
- if (strchr(name, '/')) {
- GF_ASSERT(!"inode link attempted with '/' in name");
- return NULL;
- }
+ if (name && (!strcmp(name, ".") || !strcmp(name, ".."))) {
+ return link_inode;
}
/* use only link_inode beyond this point */
if (parent) {
- old_dentry = __dentry_grep(table, parent, name);
+ old_dentry = __dentry_grep(table, parent, name, dhash);
if (!old_dentry || old_dentry->inode != link_inode) {
- dentry = __dentry_create(link_inode, parent, name);
+ dentry = dentry_create(link_inode, parent, name);
if (!dentry) {
- gf_msg_callingfn(
- THIS->name, GF_LOG_ERROR, 0, LG_MSG_DENTRY_CREATE_FAILED,
- "dentry create failed on "
- "inode %s with parent %s",
- uuid_utoa(link_inode->gfid), uuid_utoa(parent->gfid));
+ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
+ LG_MSG_DENTRY_CREATE_FAILED,
+ "dentry create failed on "
+ "inode %s with parent %s",
+ uuid_utoa_r(link_inode->gfid, link_uuid_str),
+ uuid_utoa_r(parent->gfid, parent_uuid_str));
errno = ENOMEM;
return NULL;
}
+
+ /* dentry linking needs to happen inside lock */
+ dentry->parent = __inode_ref(parent, false);
+ list_add(&dentry->inode_list, &link_inode->dentry_list);
+
if (old_inode && __is_dentry_cyclic(dentry)) {
errno = ELOOP;
- __dentry_unset(dentry);
+ dentry_destroy(__dentry_unset(dentry));
return NULL;
}
- __dentry_hash(dentry);
+ __dentry_hash(dentry, dhash);
if (old_dentry)
- __dentry_unset(old_dentry);
+ dentry_destroy(__dentry_unset(old_dentry));
}
}
@@ -1041,6 +1061,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name,
inode_t *
inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
{
+ int hash = 0;
inode_table_t *table = NULL;
inode_t *linked_inode = NULL;
@@ -1052,12 +1073,20 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
table = inode->table;
+ if (parent && name) {
+ hash = hash_dentry(parent, name, table->hashsize);
+ }
+
+ if (name && strchr(name, '/')) {
+ GF_ASSERT(!"inode link attempted with '/' in name");
+ return NULL;
+ }
+
pthread_mutex_lock(&table->lock);
{
- linked_inode = __inode_link(inode, parent, name, iatt);
-
+ linked_inode = __inode_link(inode, parent, name, iatt, hash);
if (linked_inode)
- __inode_ref(linked_inode);
+ __inode_ref(linked_inode, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1124,6 +1153,31 @@ inode_forget(inode_t *inode, uint64_t nlookup)
return 0;
}
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup)
+{
+ inode_table_t *table = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
+
+ table = inode->table;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ inode_forget_atomic(inode, nlookup);
+ __inode_unref(inode, true);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ inode_table_prune(table);
+
+ return 0;
+}
+
/*
* Invalidate an inode. This is invoked when a translator decides that an
* inode's cache is no longer valid. Any translator interested in taking action
@@ -1173,48 +1227,47 @@ inode_invalidate(inode_t *inode)
return ret;
}
-static void
+static dentry_t *
__inode_unlink(inode_t *inode, inode_t *parent, const char *name)
{
dentry_t *dentry = NULL;
char pgfid[64] = {0};
char gfid[64] = {0};
- if (!inode || !parent || !name)
- return;
-
dentry = __dentry_search_for_inode(inode, parent->gfid, name);
/* dentry NULL for corrupted backend */
if (dentry) {
- __dentry_unset(dentry);
+ dentry = __dentry_unset(dentry);
} else {
- gf_msg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
- "%s/%s: dentry not found in %s",
- uuid_utoa_r(parent->gfid, pgfid), name,
- uuid_utoa_r(inode->gfid, gfid));
+ gf_smsg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+ "parent-gfid=%s name=%s gfid%s",
+ uuid_utoa_r(parent->gfid, pgfid), name,
+ uuid_utoa_r(inode->gfid, gfid), NULL);
}
+
+ return dentry;
}
void
inode_unlink(inode_t *inode, inode_t *parent, const char *name)
{
- inode_table_t *table = NULL;
+ inode_table_t *table;
+ dentry_t *dentry;
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
+ if (!inode || !parent || !name)
return;
- }
table = inode->table;
pthread_mutex_lock(&table->lock);
{
- __inode_unlink(inode, parent, name);
+ dentry = __inode_unlink(inode, parent, name);
}
pthread_mutex_unlock(&table->lock);
+ dentry_destroy(dentry);
+
inode_table_prune(table);
}
@@ -1223,6 +1276,9 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
inode_t *dstdir, const char *dstname, inode_t *inode,
struct iatt *iatt)
{
+ int hash = 0;
+ dentry_t *dentry = NULL;
+
if (!inode) {
gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
"inode not found");
@@ -1231,13 +1287,26 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
table = inode->table;
+ if (dstname && strchr(dstname, '/')) {
+ GF_ASSERT(!"inode link attempted with '/' in name");
+ return -1;
+ }
+
+ if (dstdir && dstname) {
+ hash = hash_dentry(dstdir, dstname, table->hashsize);
+ }
+
pthread_mutex_lock(&table->lock);
{
- __inode_link(inode, dstdir, dstname, iatt);
- __inode_unlink(inode, srcdir, srcname);
+ __inode_link(inode, dstdir, dstname, iatt, hash);
+ /* pick the old dentry */
+ dentry = __inode_unlink(inode, srcdir, srcname);
}
pthread_mutex_unlock(&table->lock);
+ /* free the old dentry */
+ dentry_destroy(dentry);
+
inode_table_prune(table);
return 0;
@@ -1298,7 +1367,7 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
parent = dentry->parent;
if (parent)
- __inode_ref(parent);
+ __inode_ref(parent, false);
}
pthread_mutex_unlock(&table->lock);
@@ -1308,12 +1377,6 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
static int
__inode_has_dentry(inode_t *inode)
{
- if (!inode) {
- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
- "inode not found");
- return 0;
- }
-
return !list_empty(&inode->dentry_list);
}
@@ -1322,6 +1385,12 @@ inode_has_dentry(inode_t *inode)
{
int dentry_present = 0;
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return 0;
+ }
+
LOCK(&inode->lock);
{
dentry_present = __inode_has_dentry(inode);
@@ -1358,10 +1427,8 @@ __inode_path(inode_t *inode, const char *name, char **bufp)
i++; /* "/" */
i += strlen(trav->name);
if (i > PATH_MAX) {
- gf_msg(table->name, GF_LOG_CRITICAL, 0, LG_MSG_DENTRY_CYCLIC_LOOP,
- "possible infinite "
- "loop detected, forcing break. name=(%s)",
- name);
+ gf_smsg(table->name, GF_LOG_CRITICAL, 0, LG_MSG_DENTRY_CYCLIC_LOOP,
+ "name=%s", name, NULL);
ret = -ENOENT;
goto out;
}
@@ -1474,12 +1541,15 @@ static int
inode_table_prune(inode_table_t *table)
{
int ret = 0;
+ int ret1 = 0;
struct list_head purge = {
0,
};
inode_t *del = NULL;
inode_t *tmp = NULL;
inode_t *entry = NULL;
+ uint64_t nlookup = 0;
+ int64_t lru_size = 0;
if (!table)
return -1;
@@ -1488,7 +1558,11 @@ inode_table_prune(inode_table_t *table)
pthread_mutex_lock(&table->lock);
{
- while (table->lru_limit && table->lru_size > (table->lru_limit)) {
+ if (!table->lru_limit)
+ goto purge_list;
+
+ lru_size = table->lru_size;
+ while (lru_size > (table->lru_limit)) {
if (list_empty(&table->lru)) {
gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
LG_MSG_INVALID_INODE_LIST,
@@ -1498,26 +1572,60 @@ inode_table_prune(inode_table_t *table)
break;
}
+ lru_size--;
entry = list_entry(table->lru.next, inode_t, list);
+ /* The logic of invalidation is required only if invalidator_fn
+ is present */
+ if (table->invalidator_fn) {
+ /* check for valid inode with 'nlookup' */
+ nlookup = GF_ATOMIC_GET(entry->nlookup);
+ if (nlookup) {
+ if (entry->invalidate_sent) {
+ list_move_tail(&entry->list, &table->lru);
+ continue;
+ }
+ __inode_ref(entry, true);
+ tmp = entry;
+ break;
+ }
+ }
table->lru_size--;
__inode_retire(entry);
-
ret++;
}
+ purge_list:
list_splice_init(&table->purge, &purge);
table->purge_size = 0;
}
pthread_mutex_unlock(&table->lock);
- {
- list_for_each_entry_safe(del, tmp, &purge, list)
+ /* Pick 1 inode for invalidation */
+ if (tmp) {
+ xlator_t *old_THIS = THIS;
+ THIS = table->invalidator_xl;
+ ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
+ THIS = old_THIS;
+ pthread_mutex_lock(&table->lock);
{
- list_del_init(&del->list);
- inode_forget_atomic(del, 0);
- __inode_destroy(del);
+ if (!ret1) {
+ tmp->invalidate_sent = true;
+ __inode_unref(tmp, false);
+ } else {
+ /* Move this back to the lru list*/
+ __inode_unref(tmp, true);
+ }
}
+ pthread_mutex_unlock(&table->lock);
+ }
+
+ /* Just so that if purge list is handled too, then clear it off */
+ list_for_each_entry_safe(del, tmp, &purge, list)
+ {
+ list_del_init(&del->list);
+ inode_forget_atomic(del, 0);
+ __inode_destroy(del);
}
return ret;
@@ -1534,20 +1642,26 @@ __inode_table_init_root(inode_table_t *table)
if (!table)
return;
- root = __inode_create(table);
+ root = inode_create(table);
+
+ list_add(&root->list, &table->lru);
+ table->lru_size++;
iatt.ia_gfid[15] = 1;
iatt.ia_ino = 1;
iatt.ia_type = IA_IFDIR;
- __inode_link(root, NULL, NULL, &iatt);
+ __inode_link(root, NULL, NULL, &iatt, 0);
table->root = root;
}
inode_table_t *
-inode_table_new(size_t lru_limit, xlator_t *xl)
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl)
{
inode_table_t *new = NULL;
+ uint32_t mem_pool_size = lru_limit;
int ret = -1;
int i = 0;
@@ -1559,20 +1673,20 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
new->ctxcount = xl->graph->xl_count + 1;
new->lru_limit = lru_limit;
+ new->invalidator_fn = invalidator_fn;
+ new->invalidator_xl = invalidator_xl;
new->hashsize = 14057; /* TODO: Random Number?? */
/* In case FUSE is initing the inode table. */
- if (lru_limit == 0)
- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES;
-
- new->inode_pool = mem_pool_new(inode_t, lru_limit);
+ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
+ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES;
+ new->inode_pool = mem_pool_new(inode_t, mem_pool_size);
if (!new->inode_pool)
goto out;
- new->dentry_pool = mem_pool_new(dentry_t, lru_limit);
-
+ new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);
if (!new->dentry_pool)
goto out;
@@ -1604,6 +1718,7 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
INIT_LIST_HEAD(&new->active);
INIT_LIST_HEAD(&new->lru);
INIT_LIST_HEAD(&new->purge);
+ INIT_LIST_HEAD(&new->invalidate);
ret = gf_asprintf(&new->name, "%s/inode", xl->name);
if (-1 == ret) {
@@ -1611,6 +1726,8 @@ inode_table_new(size_t lru_limit, xlator_t *xl)
;
}
+ new->cleanup_started = _gf_false;
+
__inode_table_init_root(new);
pthread_mutex_init(&new->lock, NULL);
@@ -1633,6 +1750,13 @@ out:
return new;
}
+inode_table_t *
+inode_table_new(uint32_t lru_limit, xlator_t *xl)
+{
+ /* Only fuse for now requires the inode table with invalidator */
+ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
+}
+
int
inode_table_ctx_free(inode_table_t *table)
{
@@ -1754,6 +1878,7 @@ inode_table_destroy(inode_table_t *inode_table)
*/
pthread_mutex_lock(&inode_table->lock);
{
+ inode_table->cleanup_started = _gf_true;
/* Process lru list first as we need to unset their dentry
* entries (the ones which may not be unset during
* '__inode_passivate' as they were hashed) which in turn
@@ -1771,6 +1896,14 @@ inode_table_destroy(inode_table_t *inode_table)
inode_table->lru_size--;
}
+ /* Same logic for invalidate list */
+ while (!list_empty(&inode_table->invalidate)) {
+ trav = list_first_entry(&inode_table->invalidate, inode_t, list);
+ inode_forget_atomic(trav, 0);
+ __inode_retire(trav);
+ inode_table->invalidate_size--;
+ }
+
while (!list_empty(&inode_table->active)) {
trav = list_first_entry(&inode_table->active, inode_t, list);
/* forget and unref the inode to retire and add it to
@@ -2276,10 +2409,11 @@ inode_dump(inode_t *inode, char *prefix)
{
nlookup = GF_ATOMIC_GET(inode->nlookup);
gf_proc_dump_write("gfid", "%s", uuid_utoa(inode->gfid));
- gf_proc_dump_write("nlookup", "%ld", nlookup);
+ gf_proc_dump_write("nlookup", "%" PRIu64, nlookup);
gf_proc_dump_write("fd-count", "%u", inode->fd_count);
gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);
gf_proc_dump_write("ref", "%u", inode->ref);
+ gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
if (inode->_ctx) {
inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx),
@@ -2353,10 +2487,13 @@ inode_table_dump(inode_table_t *itable, char *prefix)
gf_proc_dump_write(key, "%d", itable->lru_size);
gf_proc_dump_build_key(key, prefix, "purge_size");
gf_proc_dump_write(key, "%d", itable->purge_size);
+ gf_proc_dump_build_key(key, prefix, "invalidate_size");
+ gf_proc_dump_write(key, "%d", itable->invalidate_size);
INODE_DUMP_LIST(&itable->active, key, prefix, "active");
INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
+ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");
pthread_mutex_unlock(&itable->lock);
}
@@ -2407,13 +2544,19 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict)
0,
};
int ret = 0;
+#ifdef DEBUG
inode_t *inode = NULL;
int count = 0;
-
+#endif
ret = pthread_mutex_trylock(&itable->lock);
if (ret)
return;
+ snprintf(key, sizeof(key), "%s.itable.lru_limit", prefix);
+ ret = dict_set_uint32(dict, key, itable->lru_limit);
+ if (ret)
+ goto out;
+
snprintf(key, sizeof(key), "%s.itable.active_size", prefix);
ret = dict_set_uint32(dict, key, itable->active_size);
if (ret)
@@ -2429,6 +2572,13 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict)
if (ret)
goto out;
+#ifdef DEBUG
+ /* Dumping inode details in dictionary and sending it to CLI is not
+ required as when a developer (or support team) asks for this command
+ output, they just want to get top level detail of inode table.
+ If one wants to debug, let them take statedump and debug, this
+ wouldn't be available in CLI during production setup.
+ */
list_for_each_entry(inode, &itable->active, list)
{
snprintf(key, sizeof(key), "%s.itable.active%d", prefix, count++);
@@ -2448,6 +2598,7 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict)
snprintf(key, sizeof(key), "%s.itable.purge%d", prefix, count++);
inode_dump_to_dict(inode, key, dict);
}
+#endif
out:
pthread_mutex_unlock(&itable->lock);
diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index 46efab692fc..4e7d2958764 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "iobuf.h"
-#include "statedump.h"
+#include "glusterfs/iobuf.h"
+#include "glusterfs/statedump.h"
#include <stdio.h>
-#include "libglusterfs-messages.h"
+#include "glusterfs/libglusterfs-messages.h"
/*
TODO: implement destroy margins and prefetching of arenas
@@ -21,7 +21,7 @@
(sizeof(gf_iobuf_init_config) / (sizeof(struct iobuf_init_config)))
/* Make sure this array is sorted based on pagesize */
-struct iobuf_init_config gf_iobuf_init_config[] = {
+static const struct iobuf_init_config gf_iobuf_init_config[] = {
/* { pagesize, num_pages }, */
{128, 1024}, {512, 512}, {2 * 1024, 512}, {8 * 1024, 128},
{32 * 1024, 64}, {128 * 1024, 32}, {256 * 1024, 8}, {1 * 1024 * 1024, 2},
@@ -41,32 +41,31 @@ gf_iobuf_get_arena_index(const size_t page_size)
}
static size_t
-gf_iobuf_get_pagesize(const size_t page_size)
+gf_iobuf_get_pagesize(const size_t page_size, int *index)
{
int i;
size_t size = 0;
for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
size = gf_iobuf_init_config[i].pagesize;
- if (page_size <= size)
+ if (page_size <= size) {
+ if (index != NULL)
+ *index = i;
return size;
+ }
}
return -1;
}
-void
+static void
__iobuf_arena_init_iobufs(struct iobuf_arena *iobuf_arena)
{
- int iobuf_cnt = 0;
+ const int iobuf_cnt = iobuf_arena->page_count;
struct iobuf *iobuf = NULL;
int offset = 0;
int i = 0;
- GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
-
- iobuf_cnt = iobuf_arena->page_count;
-
iobuf_arena->iobufs = GF_CALLOC(sizeof(*iobuf), iobuf_cnt,
gf_common_mt_iobuf);
if (!iobuf_arena->iobufs)
@@ -88,27 +87,23 @@ __iobuf_arena_init_iobufs(struct iobuf_arena *iobuf_arena)
iobuf++;
}
-out:
return;
}
-void
+static void
__iobuf_arena_destroy_iobufs(struct iobuf_arena *iobuf_arena)
{
int iobuf_cnt = 0;
struct iobuf *iobuf = NULL;
int i = 0;
- GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
-
- iobuf_cnt = iobuf_arena->page_count;
-
if (!iobuf_arena->iobufs) {
gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, LG_MSG_IOBUFS_NOT_FOUND,
"iobufs not found");
return;
}
+ iobuf_cnt = iobuf_arena->page_count;
iobuf = iobuf_arena->iobufs;
for (i = 0; i < iobuf_cnt; i++) {
GF_ASSERT(GF_ATOMIC_GET(iobuf->ref) == 0);
@@ -120,11 +115,10 @@ __iobuf_arena_destroy_iobufs(struct iobuf_arena *iobuf_arena)
GF_FREE(iobuf_arena->iobufs);
-out:
return;
}
-void
+static void
__iobuf_arena_destroy(struct iobuf_pool *iobuf_pool,
struct iobuf_arena *iobuf_arena)
{
@@ -143,12 +137,13 @@ out:
return;
}
-struct iobuf_arena *
+static struct iobuf_arena *
__iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size,
int32_t num_iobufs)
{
struct iobuf_arena *iobuf_arena = NULL;
size_t rounded_size = 0;
+ int index = 0; /* unused */
GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
@@ -162,7 +157,7 @@ __iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size,
INIT_LIST_HEAD(&iobuf_arena->passive.list);
iobuf_arena->iobuf_pool = iobuf_pool;
- rounded_size = gf_iobuf_get_pagesize(page_size);
+ rounded_size = gf_iobuf_get_pagesize(page_size, &index);
iobuf_arena->page_size = rounded_size;
iobuf_arena->page_count = num_iobufs;
@@ -173,8 +168,7 @@ __iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (iobuf_arena->mem_base == MAP_FAILED) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_MAPPING_FAILED,
- "mapping failed");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_MAPPING_FAILED, NULL);
goto err;
}
@@ -186,8 +180,7 @@ __iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size,
__iobuf_arena_init_iobufs(iobuf_arena);
if (!iobuf_arena->iobufs) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_IOBUF_FAILED,
- "init failed");
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_IOBUF_FAILED, NULL);
goto err;
}
@@ -232,8 +225,8 @@ __iobuf_pool_add_arena(struct iobuf_pool *iobuf_pool, const size_t page_size,
if (!iobuf_arena) {
iobuf_arena = __iobuf_arena_alloc(iobuf_pool, page_size, num_pages);
if (!iobuf_arena) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
- "arena not found");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
+ NULL);
return NULL;
}
}
@@ -332,7 +325,6 @@ iobuf_pool_new(void)
size_t page_size = 0;
size_t arena_size = 0;
int32_t num_pages = 0;
- int index;
iobuf_pool = GF_CALLOC(sizeof(*iobuf_pool), 1, gf_common_mt_iobuf_pool);
if (!iobuf_pool)
@@ -355,28 +347,16 @@ iobuf_pool_new(void)
iobuf_pool->mr_list[i] = NULL;
}
- pthread_mutex_lock(&iobuf_pool->mutex);
- {
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- page_size = gf_iobuf_init_config[i].pagesize;
- num_pages = gf_iobuf_init_config[i].num_pages;
-
- index = gf_iobuf_get_arena_index(page_size);
- if (index == -1) {
- pthread_mutex_unlock(&iobuf_pool->mutex);
- gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
- "page_size (%zu) of iobufs in arena being added is "
- "greater than max available",
- page_size);
- return NULL;
- }
-
- __iobuf_pool_add_arena(iobuf_pool, page_size, num_pages, index);
+ /* No locking required here
+ * as no one else can use this pool yet
+ */
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ page_size = gf_iobuf_init_config[i].pagesize;
+ num_pages = gf_iobuf_init_config[i].num_pages;
+ if (__iobuf_pool_add_arena(iobuf_pool, page_size, num_pages, i) != NULL)
arena_size += page_size * num_pages;
- }
}
- pthread_mutex_unlock(&iobuf_pool->mutex);
/* Need an arena to handle all the bigger iobuf requests */
iobuf_create_stdalloc_arena(iobuf_pool);
@@ -466,15 +446,18 @@ __iobuf_select_arena(struct iobuf_pool *iobuf_pool, const size_t page_size,
return iobuf_arena;
}
-/* iobuf_arena variable is validaed to be non-NULL by all callers */
+/* Always called under the iobuf_pool mutex lock */
static struct iobuf *
-__iobuf_get(struct iobuf_arena *iobuf_arena, const size_t page_size,
+__iobuf_get(struct iobuf_pool *iobuf_pool, const size_t page_size,
const int index)
{
struct iobuf *iobuf = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
- iobuf_pool = iobuf_arena->iobuf_pool;
+ /* most eligible arena for picking an iobuf */
+ iobuf_arena = __iobuf_select_arena(iobuf_pool, page_size, index);
+ if (!iobuf_arena)
+ return NULL;
list_for_each_entry(iobuf, &iobuf_arena->passive.list, list) break;
@@ -498,8 +481,8 @@ __iobuf_get(struct iobuf_arena *iobuf_arena, const size_t page_size,
return iobuf;
}
-struct iobuf *
-iobuf_get_from_stdalloc(struct iobuf_pool *iobuf_pool, size_t page_size)
+static struct iobuf *
+iobuf_get_from_stdalloc(struct iobuf_pool *iobuf_pool, const size_t page_size)
{
struct iobuf *iobuf = NULL;
struct iobuf_arena *iobuf_arena = NULL;
@@ -545,7 +528,6 @@ struct iobuf *
iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size)
{
struct iobuf *iobuf = NULL;
- struct iobuf_arena *iobuf_arena = NULL;
size_t rounded_size = 0;
int index = 0;
@@ -553,7 +535,7 @@ iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size)
page_size = iobuf_pool->default_page_size;
}
- rounded_size = gf_iobuf_get_pagesize(page_size);
+ rounded_size = gf_iobuf_get_pagesize(page_size, &index);
if (rounded_size == -1) {
/* make sure to provide the requested buffer with standard
memory allocations */
@@ -567,33 +549,26 @@ iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size)
iobuf_pool->request_misses++;
return iobuf;
- }
-
- index = gf_iobuf_get_arena_index(page_size);
- if (index == -1) {
- gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
- "page_size (%zu) of iobufs in arena being added is "
- "greater than max available",
- page_size);
+ } else if (index == -1) {
+ gf_smsg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size=%zu", page_size, NULL);
return NULL;
}
pthread_mutex_lock(&iobuf_pool->mutex);
{
- /* most eligible arena for picking an iobuf */
- iobuf_arena = __iobuf_select_arena(iobuf_pool, rounded_size, index);
- if (!iobuf_arena)
- goto unlock;
-
- iobuf = __iobuf_get(iobuf_arena, rounded_size, index);
- if (!iobuf)
- goto unlock;
+ iobuf = __iobuf_get(iobuf_pool, rounded_size, index);
+ if (!iobuf) {
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
+ NULL);
+ goto post_unlock;
+ }
iobuf_ref(iobuf);
}
-unlock:
pthread_mutex_unlock(&iobuf_pool->mutex);
-
+post_unlock:
return iobuf;
}
@@ -633,56 +608,41 @@ struct iobuf *
iobuf_get(struct iobuf_pool *iobuf_pool)
{
struct iobuf *iobuf = NULL;
- struct iobuf_arena *iobuf_arena = NULL;
int index = 0;
GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
index = gf_iobuf_get_arena_index(iobuf_pool->default_page_size);
if (index == -1) {
- gf_msg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
- "page_size (%zu) of iobufs in arena being added is "
- "greater than max available",
- iobuf_pool->default_page_size);
+ gf_smsg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size=%zu", iobuf_pool->default_page_size, NULL);
return NULL;
}
pthread_mutex_lock(&iobuf_pool->mutex);
{
- /* most eligible arena for picking an iobuf */
- iobuf_arena = __iobuf_select_arena(
- iobuf_pool, iobuf_pool->default_page_size, index);
- if (!iobuf_arena) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
- "arena not found");
- goto unlock;
- }
-
- iobuf = __iobuf_get(iobuf_arena, iobuf_pool->default_page_size, index);
+ iobuf = __iobuf_get(iobuf_pool, iobuf_pool->default_page_size, index);
if (!iobuf) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
- "iobuf not found");
- goto unlock;
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
+ NULL);
+ goto out;
}
iobuf_ref(iobuf);
}
-unlock:
pthread_mutex_unlock(&iobuf_pool->mutex);
out:
return iobuf;
}
-void
+static void
__iobuf_put(struct iobuf *iobuf, struct iobuf_arena *iobuf_arena)
{
struct iobuf_pool *iobuf_pool = NULL;
int index = 0;
- GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
- GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
-
iobuf_pool = iobuf_arena->iobuf_pool;
index = gf_iobuf_get_arena_index(iobuf_arena->page_size);
@@ -735,15 +695,14 @@ iobuf_put(struct iobuf *iobuf)
iobuf_arena = iobuf->iobuf_arena;
if (!iobuf_arena) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
- "arena not found");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, NULL);
return;
}
iobuf_pool = iobuf_arena->iobuf_pool;
if (!iobuf_pool) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND,
- "iobuf pool not found");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, "iobuf",
+ NULL);
return;
}
@@ -982,14 +941,12 @@ iobuf_size(struct iobuf *iobuf)
GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
if (!iobuf->iobuf_arena) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
- "arena not found");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, NULL);
goto out;
}
if (!iobuf->iobuf_arena->iobuf_pool) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND,
- "pool not found");
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, NULL);
goto out;
}
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
index afbb6dcad80..ce4b0e8255d 100644
--- a/libglusterfs/src/latency.c
+++ b/libglusterfs/src/latency.c
@@ -13,41 +13,35 @@
* latencies of FOPs broken down by subvolumes.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "statedump.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/statedump.h"
-void
-gf_update_latency(call_frame_t *frame)
+gf_latency_t *
+gf_latency_new(size_t n)
{
- double elapsed;
- struct timespec *begin, *end;
-
- fop_latency_t *lat;
-
- begin = &frame->begin;
- end = &frame->end;
+ int i = 0;
+ gf_latency_t *lat = NULL;
- if (!(begin->tv_sec && end->tv_sec))
- goto out;
+ lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t);
+ if (!lat)
+ return NULL;
- elapsed = (end->tv_sec - begin->tv_sec) * 1e9 +
- (end->tv_nsec - begin->tv_nsec);
+ for (i = 0; i < n; i++) {
+ gf_latency_reset(lat + i);
+ }
+ return lat;
+}
- if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
- gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
- frame->op);
+void
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end)
+{
+ if (!(begin->tv_sec && end->tv_sec)) {
+ /*Measure latency might have been enabled/disabled during the op*/
return;
}
- /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
- set it right anyways for those frames */
- if (!frame->op)
- frame->op = frame->root->op;
-
- lat = &frame->this->stats.interval.latencies[frame->op];
+ double elapsed = gf_tsdiff(begin, end);
if (lat->max < elapsed)
lat->max = elapsed;
@@ -57,40 +51,34 @@ gf_update_latency(call_frame_t *frame)
lat->total += elapsed;
lat->count++;
-out:
- return;
}
void
-gf_proc_dump_latency_info(xlator_t *xl)
+gf_latency_reset(gf_latency_t *lat)
{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i;
-
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
- gf_proc_dump_add_section("%s", key_prefix);
-
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
-
- fop_latency_t *lat = &xl->stats.interval.latencies[i];
+ if (!lat)
+ return;
+ memset(lat, 0, sizeof(*lat));
+ lat->min = ULLONG_MAX;
+ /* make sure 'min' is set to high value, so it would be
+ properly set later */
+}
- /* Doesn't make sense to continue if there are no fops
- came in the given interval */
- if (!lat->count)
- continue;
+void
+gf_frame_latency_update(call_frame_t *frame)
+{
+ gf_latency_t *lat;
+ /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
+ set it right anyways for those frames */
+ if (!frame->op)
+ frame->op = frame->root->op;
- gf_proc_dump_write(key, "%.03f,%" PRId64 ",%.03f",
- (lat->total / lat->count), lat->count, lat->total);
+ if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+ gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+ frame->op);
+ return;
}
- memset(xl->stats.interval.latencies, 0,
- sizeof(xl->stats.interval.latencies));
-
- /* make sure 'min' is set to high value, so it would be
- properly set later */
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- xl->stats.interval.latencies[i].min = 0xffffffff;
- }
+ lat = &frame->this->stats.interval.latencies[frame->op];
+ gf_latency_update(lat, &frame->begin, &frame->end);
}
diff --git a/libglusterfs/src/libglusterfs-messages.h b/libglusterfs/src/libglusterfs-messages.h
deleted file mode 100644
index d40d644fad9..00000000000
--- a/libglusterfs/src/libglusterfs-messages.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
- */
-
-#ifndef _LG_MESSAGES_H_
-#define _LG_MESSAGES_H_
-
-#include "glfs-message-id.h"
-
-/* To add new message IDs, append new identifiers at the end of the list.
- *
- * Never remove a message ID. If it's not used anymore, you can rename it or
- * leave it as it is, but not delete it. This is to prevent reutilization of
- * IDs by other messages.
- *
- * The component name must match one of the entries defined in
- * glfs-message-id.h.
- */
-
-GLFS_MSGID(
- LIBGLUSTERFS, LG_MSG_ASPRINTF_FAILED, LG_MSG_INVALID_ENTRY,
- LG_MSG_COUNT_LESS_THAN_ZERO, LG_MSG_COUNT_LESS_THAN_DATA_PAIRS,
- LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, LG_MSG_PAIRS_LESS_THAN_COUNT,
- LG_MSG_KEY_OR_VALUE_NULL, LG_MSG_FAILED_TO_LOG_DICT,
- LG_MSG_NULL_VALUE_IN_DICT, LG_MSG_DIR_OP_FAILED,
- LG_MSG_STORE_HANDLE_CREATE_FAILED, LG_MSG_FILE_OP_FAILED,
- LG_MSG_FILE_STAT_FAILED, LG_MSG_LOCK_FAILED, LG_MSG_UNLOCK_FAILED,
- LG_MSG_DICT_SERIAL_FAILED, LG_MSG_DICT_UNSERIAL_FAILED, LG_MSG_NO_MEMORY,
- LG_MSG_VOLUME_ERROR, LG_MSG_SUB_VOLUME_ERROR, LG_MSG_SYNTAX_ERROR,
- LG_MSG_BACKTICK_PARSE_FAILED, LG_MSG_BUFFER_ERROR, LG_MSG_STRDUP_ERROR,
- LG_MSG_HASH_FUNC_ERROR, LG_MSG_GET_BUCKET_FAILED, LG_MSG_INSERT_FAILED,
- LG_MSG_OUT_OF_RANGE, LG_MSG_VALIDATE_RETURNS, LG_MSG_VALIDATE_REC_FAILED,
- LG_MSG_RB_TABLE_CREATE_FAILED, LG_MSG_PATH_NOT_FOUND,
- LG_MSG_EXPAND_FD_TABLE_FAILED, LG_MSG_MAPPING_FAILED,
- LG_MSG_INIT_IOBUF_FAILED, LG_MSG_PAGE_SIZE_EXCEEDED, LG_MSG_ARENA_NOT_FOUND,
- LG_MSG_IOBUF_NOT_FOUND, LG_MSG_POOL_NOT_FOUND, LG_MSG_SET_ATTRIBUTE_FAILED,
- LG_MSG_READ_ATTRIBUTE_FAILED, LG_MSG_UNMOUNT_FAILED,
- LG_MSG_LATENCY_MEASUREMENT_STATE, LG_MSG_NO_PERM, LG_MSG_NO_KEY,
- LG_MSG_DICT_NULL, LG_MSG_INIT_TIMER_FAILED, LG_MSG_FD_ANONYMOUS_FAILED,
- LG_MSG_FD_CREATE_FAILED, LG_MSG_BUFFER_FULL, LG_MSG_FWRITE_FAILED,
- LG_MSG_PRINT_FAILED, LG_MSG_MEM_POOL_DESTROY,
- LG_MSG_EXPAND_CLIENT_TABLE_FAILED, LG_MSG_DISCONNECT_CLIENT,
- LG_MSG_PIPE_CREATE_FAILED, LG_MSG_SET_PIPE_FAILED,
- LG_MSG_REGISTER_PIPE_FAILED, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS,
- LG_MSG_INDEX_NOT_FOUND, LG_MSG_EPOLL_FD_CREATE_FAILED,
- LG_MSG_SLOT_NOT_FOUND, LG_MSG_STALE_FD_FOUND, LG_MSG_GENERATION_MISMATCH,
- LG_MSG_PTHREAD_KEY_CREATE_FAILED, LG_MSG_TRANSLATOR_INIT_FAILED,
- LG_MSG_UUID_BUF_INIT_FAILED, LG_MSG_LKOWNER_BUF_INIT_FAILED,
- LG_MSG_SYNCTASK_INIT_FAILED, LG_MSG_SYNCOPCTX_INIT_FAILED,
- LG_MSG_GLOBAL_INIT_FAILED, LG_MSG_PTHREAD_FAILED, LG_MSG_DIR_IS_SYMLINK,
- LG_MSG_RESOLVE_HOSTNAME_FAILED, LG_MSG_GETADDRINFO_FAILED,
- LG_MSG_GETNAMEINFO_FAILED, LG_MSG_PATH_ERROR, LG_MSG_INET_PTON_FAILED,
- LG_MSG_NEGATIVE_NUM_PASSED, LG_MSG_GETHOSTNAME_FAILED,
- LG_MSG_RESERVED_PORTS_ERROR, LG_MSG_INVALID_PORT, LG_MSG_INVALID_FAMILY,
- LG_MSG_CONVERSION_FAILED, LG_MSG_SKIP_HEADER_FAILED, LG_MSG_INVALID_LOG,
- LG_MSG_UTIMES_FAILED, LG_MSG_BACKTRACE_SAVE_FAILED, LG_MSG_INIT_FAILED,
- LG_MSG_VALIDATION_FAILED, LG_MSG_GRAPH_ERROR, LG_MSG_UNKNOWN_OPTIONS_FAILED,
- LG_MSG_CTX_NULL, LG_MSG_TMPFILE_CREATE_FAILED, LG_MSG_DLOPEN_FAILED,
- LG_MSG_LOAD_FAILED, LG_MSG_DLSYM_ERROR, LG_MSG_TREE_NOT_FOUND,
- LG_MSG_PER_DENTRY, LG_MSG_DENTRY, LG_MSG_GETIFADDRS_FAILED,
- LG_MSG_REGEX_OP_FAILED, LG_MSG_FRAME_ERROR, LG_MSG_SET_PARAM_FAILED,
- LG_MSG_GET_PARAM_FAILED, LG_MSG_PREPARE_FAILED, LG_MSG_EXEC_FAILED,
- LG_MSG_BINDING_FAILED, LG_MSG_DELETE_FAILED, LG_MSG_GET_ID_FAILED,
- LG_MSG_CREATE_FAILED, LG_MSG_PARSE_FAILED, LG_MSG_GETCONTEXT_FAILED,
- LG_MSG_UPDATE_FAILED, LG_MSG_QUERY_CALL_BACK_FAILED,
- LG_MSG_GET_RECORD_FAILED, LG_MSG_DB_ERROR, LG_MSG_CONNECTION_ERROR,
- LG_MSG_NOT_MULTITHREAD_MODE, LG_MSG_SKIP_PATH, LG_MSG_INVALID_FOP,
- LG_MSG_QUERY_FAILED, LG_MSG_CLEAR_COUNTER_FAILED, LG_MSG_LOCK_LIST_FAILED,
- LG_MSG_UNLOCK_LIST_FAILED, LG_MSG_ADD_TO_LIST_FAILED, LG_MSG_INIT_DB_FAILED,
- LG_MSG_DELETE_FROM_LIST_FAILED, LG_MSG_CLOSE_CONNECTION_FAILED,
- LG_MSG_INSERT_OR_UPDATE_FAILED, LG_MSG_FIND_OP_FAILED,
- LG_MSG_CONNECTION_INIT_FAILED, LG_MSG_COMPLETED_TASK, LG_MSG_WAKE_UP_ZOMBIE,
- LG_MSG_REWAITING_TASK, LG_MSG_SLEEP_ZOMBIE, LG_MSG_SWAPCONTEXT_FAILED,
- LG_MSG_UNSUPPORTED_PLUGIN, LG_MSG_INVALID_DB_TYPE, LG_MSG_UNDERSIZED_BUF,
- LG_MSG_DATA_CONVERSION_ERROR, LG_MSG_DICT_ERROR, LG_MSG_IOBUFS_NOT_FOUND,
- LG_MSG_ENTRIES_NULL, LG_MSG_FD_NOT_FOUND_IN_FDTABLE,
- LG_MSG_REALLOC_FOR_FD_PTR_FAILED, LG_MSG_DICT_SET_FAILED, LG_MSG_NULL_PTR,
- LG_MSG_RBTHASH_INIT_BUCKET_FAILED, LG_MSG_ASSERTION_FAILED,
- LG_MSG_HOSTNAME_NULL, LG_MSG_INVALID_IPV4_FORMAT,
- LG_MSG_CTX_CLEANUP_STARTED, LG_MSG_TIMER_REGISTER_ERROR,
- LG_MSG_PTR_HEADER_CORRUPTED, LG_MSG_INVALID_UPLINK, LG_MSG_CLIENT_NULL,
- LG_MSG_XLATOR_DOES_NOT_IMPLEMENT, LG_MSG_DENTRY_NOT_FOUND,
- LG_MSG_INODE_NOT_FOUND, LG_MSG_INODE_TABLE_NOT_FOUND,
- LG_MSG_DENTRY_CREATE_FAILED, LG_MSG_INODE_CONTEXT_FREED,
- LG_MSG_UNKNOWN_LOCK_TYPE, LG_MSG_UNLOCK_BEFORE_LOCK,
- LG_MSG_LOCK_OWNER_ERROR, LG_MSG_MEMPOOL_PTR_NULL,
- LG_MSG_QUOTA_XATTRS_MISSING, LG_MSG_INVALID_STRING, LG_MSG_BIND_REF,
- LG_MSG_REF_COUNT, LG_MSG_INVALID_ARG, LG_MSG_VOL_OPTION_ADD,
- LG_MSG_XLATOR_OPTION_INVALID, LG_MSG_GETTIMEOFDAY_FAILED,
- LG_MSG_GRAPH_INIT_FAILED, LG_MSG_EVENT_NOTIFY_FAILED,
- LG_MSG_ACTIVE_GRAPH_NULL, LG_MSG_VOLFILE_PARSE_ERROR, LG_MSG_FD_INODE_NULL,
- LG_MSG_INVALID_VOLFILE_ENTRY, LG_MSG_PER_DENTRY_FAILED,
- LG_MSG_PARENT_DENTRY_NOT_FOUND, LG_MSG_DENTRY_CYCLIC_LOOP,
- LG_MSG_INVALID_POLL_IN, LG_MSG_INVALID_POLL_OUT, LG_MSG_EPOLL_FD_ADD_FAILED,
- LG_MSG_EPOLL_FD_DEL_FAILED, LG_MSG_EPOLL_FD_MODIFY_FAILED,
- LG_MSG_STARTED_EPOLL_THREAD, LG_MSG_EXITED_EPOLL_THREAD,
- LG_MSG_START_EPOLL_THREAD_FAILED, LG_MSG_FALLBACK_TO_POLL,
- LG_MSG_QUOTA_CONF_ERROR, LG_MSG_RBTHASH_GET_ENTRY_FAILED,
- LG_MSG_RBTHASH_GET_BUCKET_FAILED, LG_MSG_RBTHASH_INSERT_FAILED,
- LG_MSG_RBTHASH_INIT_ENTRY_FAILED, LG_MSG_TMPFILE_DELETE_FAILED,
- LG_MSG_MEMPOOL_INVALID_FREE, LG_MSG_LOCK_FAILURE, LG_MSG_SET_LOG_LEVEL,
- LG_MSG_VERIFY_PLATFORM, LG_MSG_RUNNER_LOG, LG_MSG_LEASEID_BUF_INIT_FAILED,
- LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST,
- LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED,
- LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG,
- LG_MSG_XXH64_TO_GFID_FAILED);
-
-#endif /* !_LG_MESSAGES_H_ */
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
index c8c42311c80..5f18cd56cbe 100644
--- a/libglusterfs/src/libglusterfs.sym
+++ b/libglusterfs/src/libglusterfs.sym
@@ -92,6 +92,8 @@ args_xattrop_cbk_store
args_xattrop_store
args_zerofill_cbk_store
args_zerofill_store
+args_copy_file_range_cbk_store
+args_copy_file_range_store
bin_to_data
call_resume
call_resume_keep_stub
@@ -127,10 +129,6 @@ cluster_uninodelk
cluster_unlink
cluster_xattrop
cluster_xattrop_cbk
-compound_args_cbk_alloc
-compound_args_cbk_cleanup
-compound_args_cleanup
-compound_fop_alloc
copy_opts_to_child
create_frame
data_copy
@@ -351,7 +349,10 @@ default_put
default_put_cbk
default_put_failure_cbk
default_put_resume
-__dentry_grep
+default_copy_file_range
+default_copy_file_range_cbk
+default_copy_file_range_failure_cbk
+default_copy_file_range_resume
dht_is_linkfile
dict_add
dict_addn
@@ -374,6 +375,7 @@ dict_get_bin
dict_get_double
dict_get_gfuuid
dict_get_iatt
+dict_get_mdata
dict_get_int16
dict_get_int32
dict_get_int32n
@@ -399,6 +401,7 @@ dict_rename_key
dict_reset
dict_serialize
dict_serialized_length
+dict_serialized_length_lk
dict_serialize_value_with_delim
dict_set
dict_setn
@@ -410,6 +413,7 @@ dict_set_dynstrn
dict_set_dynstr_with_alloc
dict_set_gfuuid
dict_set_iatt
+dict_set_mdata
dict_set_int16
dict_set_int32
dict_set_int32n
@@ -434,19 +438,20 @@ eh_dump
eh_new
eh_save_history
entry_copy
-event_dispatch
-event_dispatch_destroy
-event_handled
-event_pool_destroy
-event_pool_new
-event_reconfigure_threads
-event_register
-event_select_on
-event_unregister
-event_unregister_close
+gf_event_dispatch
+gf_event_dispatch_destroy
+gf_event_handled
+gf_event_pool_destroy
+gf_event_pool_new
+gf_event_reconfigure_threads
+gf_event_register
+gf_event_select_on
+gf_event_unregister
+gf_event_unregister_close
fd_anonymous
fd_anonymous_with_flags
fd_bind
+fd_close
fd_create
fd_create_uint64
__fd_ctx_del
@@ -471,6 +476,8 @@ fd_unref
_fini
fop_access_stub
fop_create_stub
+fop_copy_file_range_stub
+fop_copy_file_range_cbk_stub
fop_discard_stub
fop_entrylk_stub
fop_enum_to_pri_string
@@ -500,6 +507,7 @@ fop_lease_stub
fop_link_stub
fop_lk_stub
fop_log_level
+fop_lookup_cbk_stub
fop_lookup_stub
fop_mkdir_stub
fop_mknod_stub
@@ -539,14 +547,16 @@ get_checksum_for_path
get_file_mtime
get_host_name
get_mem_size
-get_new_dict
get_path_name
-get_struct_variable
-get_vol_type
get_xlator_by_name
get_xlator_by_type
gf_array_insertionsort
gf_asprintf
+gf_async
+gf_async_adjust_threads
+gf_async_ctrl
+gf_async_init
+gf_async_fini
gf_backtrace_save
gf_bits_count
gf_bits_index
@@ -575,6 +585,7 @@ gf_dirent_free
gf_dirent_orig_offset
gf_dm_hashfn
gf_dnscache_init
+gf_dnscache_deinit
gf_errno_to_error
gf_error_to_errno
_gf_event
@@ -683,6 +694,7 @@ gf_store_read_and_tokenize
gf_store_rename_tmppath
gf_store_retrieve_value
gf_store_save_value
+gf_store_save_items
gf_store_unlink_tmppath
gf_store_unlock
gf_string2boolean
@@ -703,7 +715,10 @@ gf_strTrim
gf_strstr
gf_thread_cleanup_xint
gf_thread_create
+gf_thread_vcreate
gf_thread_create_detached
+gf_thread_set_name
+gf_thread_set_vname
gf_timer_call_after
gf_timer_call_cancel
gf_timer_registry_destroy
@@ -783,6 +798,7 @@ __inode_find
inode_find
inode_find_directory_name
inode_forget
+inode_forget_with_unref
inode_from_path
inode_grep
inode_grep_for_gfid
@@ -807,6 +823,7 @@ inode_table_destroy_all
inode_table_dump
inode_table_dump_to_dict
inode_table_new
+inode_table_with_invalidator
__inode_table_set_lru_limit
inode_table_set_lru_limit
inode_unlink
@@ -859,8 +876,7 @@ mem_get0
mem_pool_destroy
mem_pool_new_fn
mem_pools_fini
-mem_pools_init_early
-mem_pools_init_late
+mem_pools_init
mem_put
mkdir_p
next_token
@@ -923,6 +939,12 @@ syncbarrier_destroy
syncbarrier_init
syncbarrier_wait
syncbarrier_wake
+synccond_init
+synccond_destroy
+synccond_wait
+synccond_timedwait
+synccond_signal
+synccond_broadcast
syncenv_destroy
syncenv_new
synclock_destroy
@@ -933,6 +955,7 @@ synclock_unlock
syncop_access
syncop_close
syncop_create
+syncop_copy_file_range
syncopctx_getctx
syncopctx_setfsgid
syncopctx_setfsgroups
@@ -999,6 +1022,7 @@ synctask_new
synctask_new1
synctask_set
synctask_setid
+synctask_sleep
synctask_wake
synctask_yield
sys_access
@@ -1006,6 +1030,7 @@ sys_chmod
sys_chown
sys_close
sys_closedir
+sys_copy_file_range
sys_creat
sys_fallocate
sys_fchmod
@@ -1023,6 +1048,7 @@ sys_futimes
sys_lchown
sys_lgetxattr
sys_link
+sys_linkat
sys_llistxattr
sys_lremovexattr
sys_lseek
@@ -1046,16 +1072,21 @@ sys_rmdir
sys_stat
sys_statvfs
sys_symlink
+sys_symlinkat
sys_truncate
sys_unlink
+sys_unlinkat
sys_utimensat
sys_write
sys_writev
sys_socket
sys_accept
+sys_kill
+sys_sysctl
tbf_init
tbf_throttle
timespec_now
+timespec_now_realtime
timespec_sub
timespec_adjust_delta
timespec_cmp
@@ -1079,6 +1110,7 @@ xlator_foreach
xlator_foreach_depth_first
xlator_init
xlator_mem_acct_init
+xlator_mem_acct_unref
xlator_notify
xlator_option_info_list
xlator_option_init_bool
@@ -1089,10 +1121,12 @@ xlator_option_init_percent
xlator_option_init_percent_or_size
xlator_option_init_size
xlator_option_init_size_uint64
+xlator_option_init_size_int64
xlator_option_init_str
xlator_option_init_time
xlator_option_init_uint32
xlator_option_init_uint64
+xlator_option_init_int64
xlator_option_init_xlator
xlator_option_reconf_bool
xlator_option_reconf_int32
@@ -1101,10 +1135,12 @@ xlator_option_reconf_percent
xlator_option_reconf_percent_or_size
xlator_option_reconf_size
xlator_option_reconf_size_uint64
+xlator_option_reconf_size_int64
xlator_option_reconf_str
xlator_option_reconf_time
xlator_option_reconf_uint32
xlator_option_reconf_uint64
+xlator_option_reconf_int64
xlator_option_reconf_xlator
xlator_options_validate
xlator_options_validate_list
@@ -1121,6 +1157,7 @@ xlator_volume_option_get
xlator_volume_option_get_list
xlator_memrec_free
xlator_mem_cleanup
+gluster_graph_take_reference
default_fops
gf_fop_list
gf_upcall_list
@@ -1137,3 +1174,20 @@ gf_replace_new_iatt_in_dict
gf_changelog_init
gf_changelog_register_generic
gf_gfid_generate_from_xxh64
+find_xlator_option_in_cmd_args_t
+gf_d_type_from_ia_type
+glusterfs_graph_fini
+glusterfs_process_svc_attach_volfp
+glusterfs_mux_volfile_reconfigure
+glusterfs_process_svc_detach
+mgmt_is_multiplexed_daemon
+xlator_is_cleanup_starting
+gf_nanosleep
+gf_syncfs
+graph_total_client_xlator
+get_xattrs_to_heal
+gf_latency_statedump_and_reset
+gf_latency_new
+gf_latency_reset
+gf_latency_update
+gf_frame_latency_update
diff --git a/libglusterfs/src/locking.c b/libglusterfs/src/locking.c
index 89cbaa00c8b..7577054e33a 100644
--- a/libglusterfs/src/locking.c
+++ b/libglusterfs/src/locking.c
@@ -15,7 +15,7 @@
#include <unistd.h>
#define LOCKING_IMPL
-#include "locking.h"
+#include "glusterfs/locking.h"
int use_spinlocks = 0;
diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
index b1a167f7997..a930d3e3b63 100644
--- a/libglusterfs/src/logging.c
+++ b/libglusterfs/src/logging.c
@@ -17,6 +17,7 @@
#include <string.h>
#include <stdlib.h>
#include <syslog.h>
+#include <sys/resource.h>
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
@@ -26,7 +27,7 @@
#include <sys/stat.h>
-#include "syscall.h"
+#include "glusterfs/syscall.h"
#define GF_JSON_MSG_LENGTH 8192
#define GF_SYSLOG_CEE_FORMAT \
@@ -34,22 +35,19 @@
#define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf"
#define GF_LOG_BACKTRACE_DEPTH 5
#define GF_LOG_BACKTRACE_SIZE 4096
-#define GF_LOG_TIMESTR_SIZE 256
#define GF_MAX_SLOG_PAIR_COUNT 100
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-#include "glusterfs.h"
-#include "timer.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/timer.h"
+#include "glusterfs/libglusterfs-messages.h"
/* Do not replace gf_log in TEST_LOG with gf_msg, as there is a slight chance
* that it could lead to an infinite recursion.*/
#define TEST_LOG(__msg, __args...) \
gf_log("logging-infra", GF_LOG_DEBUG, __msg, ##__args);
-void
+static void
gf_log_flush_timeout_cbk(void *data);
int
@@ -58,72 +56,54 @@ gf_log_inject_timer_event(glusterfs_ctx_t *ctx);
static void
gf_log_flush_extra_msgs(glusterfs_ctx_t *ctx, uint32_t new);
-static char *gf_level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
-/* Ideally this should get moved to logging.h */
-struct _msg_queue {
- struct list_head msgs;
-};
-
-struct _log_msg {
- const char *msg;
- struct list_head queue;
+static int
+log_buf_init(log_buf_t *buf, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, int graph_id);
+static void
+gf_log_rotate(glusterfs_ctx_t *ctx);
+
+static char gf_level_strings[] = {
+ ' ', /* NONE */
+ 'M', /* EMERGENCY */
+ 'A', /* ALERT */
+ 'C', /* CRITICAL */
+ 'E', /* ERROR */
+ 'W', /* WARNING */
+ 'N', /* NOTICE */
+ 'I', /* INFO */
+ 'D', /* DEBUG */
+ 'T', /* TRACE */
};
void
gf_log_logrotate(int signum)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx) {
- ctx->log.logrotate = 1;
- ctx->log.cmd_history_logrotate = 1;
+ if (THIS->ctx) {
+ THIS->ctx->log.logrotate = 1;
+ THIS->ctx->log.cmd_history_logrotate = 1;
}
}
void
gf_log_enable_syslog(void)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- ctx->log.gf_log_syslog = 1;
+ if (THIS->ctx)
+ THIS->ctx->log.gf_log_syslog = 1;
}
void
gf_log_disable_syslog(void)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- ctx->log.gf_log_syslog = 0;
+ if (THIS->ctx)
+ THIS->ctx->log.gf_log_syslog = 0;
}
gf_loglevel_t
gf_log_get_loglevel(void)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- return ctx->log.loglevel;
+ if (THIS->ctx)
+ return THIS->ctx->log.loglevel;
else
/* return global defaults (see gf_log_globals_init) */
return GF_LOG_INFO;
@@ -139,12 +119,8 @@ gf_log_set_loglevel(glusterfs_ctx_t *ctx, gf_loglevel_t level)
int
gf_log_get_localtime(void)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- return ctx->log.localtime;
+ if (THIS->ctx)
+ return THIS->ctx->log.localtime;
else
/* return global defaults (see gf_log_globals_init) */
return 0;
@@ -153,22 +129,15 @@ gf_log_get_localtime(void)
void
gf_log_set_localtime(int on_off)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- ctx->log.localtime = on_off;
+ if (THIS->ctx)
+ THIS->ctx->log.localtime = on_off;
}
void
gf_log_flush(void)
{
- xlator_t *this = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- this = THIS;
- ctx = this->ctx;
+ xlator_t *this = THIS;
+ glusterfs_ctx_t *ctx = this->ctx;
if (ctx && ctx->log.logger == gf_logger_glusterlog) {
pthread_mutex_lock(&ctx->log.logfile_mutex);
@@ -205,54 +174,19 @@ gf_log_set_xl_loglevel(void *this, gf_loglevel_t level)
*
* care needs to be taken to configure and start daemons based on the versions
* that supports these features */
-gf_log_format_t
-gf_log_get_logformat(void)
-{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- return ctx->log.logformat;
- else
- /* return global defaluts (see gf_log_globals_init) */
- return gf_logformat_withmsgid;
-}
void
gf_log_set_logformat(gf_log_format_t format)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- ctx->log.logformat = format;
-}
-
-gf_log_logger_t
-gf_log_get_logger(void)
-{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- return ctx->log.logger;
- else
- /* return global defaluts (see gf_log_globals_init) */
- return gf_logger_glusterlog;
+ if (THIS->ctx)
+ THIS->ctx->log.logformat = format;
}
void
gf_log_set_logger(gf_log_logger_t logger)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- ctx->log.logger = logger;
+ if (THIS->ctx)
+ THIS->ctx->log.logger = logger;
}
gf_loglevel_t
@@ -296,21 +230,11 @@ gf_log_set_log_flush_timeout(uint32_t timeout)
THIS->ctx->log.timeout = timeout;
}
-log_buf_t *
-log_buf_new()
-{
- log_buf_t *buf = NULL;
-
- buf = mem_get0(THIS->ctx->logbuf_pool);
-
- return buf;
-}
-
/* If log_buf_init() fails (indicated by a return value of -1),
* call log_buf_destroy() to clean up memory allocated in heap and to return
* the log_buf_t object back to its memory pool.
*/
-int
+static int
log_buf_init(log_buf_t *buf, const char *domain, const char *file,
const char *function, int32_t line, gf_loglevel_t level,
int errnum, uint64_t msgid, char **appmsgstr, int graph_id)
@@ -349,7 +273,7 @@ out:
return ret;
}
-int
+static int
log_buf_destroy(log_buf_t *buf)
{
if (!buf)
@@ -387,18 +311,16 @@ gf_log_rotate(glusterfs_ctx_t *ctx)
fd = sys_open(ctx->log.filename, O_CREAT | O_WRONLY | O_APPEND,
S_IRUSR | S_IWUSR);
if (fd < 0) {
- gf_msg("logrotate", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
- "failed to open "
- "logfile");
+ gf_smsg("logrotate", GF_LOG_ERROR, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, NULL);
return;
}
new_logfile = fdopen(fd, "a");
if (!new_logfile) {
- gf_msg("logrotate", GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED,
- "failed to open logfile"
- " %s",
- ctx->log.filename);
+ gf_smsg("logrotate", GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "filename=%s",
+ ctx->log.filename, NULL);
sys_close(fd);
return;
}
@@ -507,7 +429,7 @@ out:
*
* @return: void
*/
-void
+static void
gf_openlog(const char *ident, int option, int facility)
{
int _option = option;
@@ -551,7 +473,7 @@ gf_openlog(const char *ident, int option, int facility)
* buf = "I/O error\u001bon /tmp/bar file"
*
*/
-char *
+static char *
_json_escape(const char *str, char *buf, size_t len)
{
static const unsigned char json_exceptions[UCHAR_MAX + 1] = {
@@ -630,7 +552,7 @@ _json_escape(const char *str, char *buf, size_t len)
*
* @return: void
*/
-void
+static void
gf_syslog(int facility_priority, char *format, ...)
{
char *msg = NULL;
@@ -680,12 +602,10 @@ gf_log_globals_init(void *data, gf_loglevel_t level)
int
gf_log_init(void *data, const char *file, const char *ident)
{
- glusterfs_ctx_t *ctx = NULL;
+ glusterfs_ctx_t *ctx = data;
int fd = -1;
struct stat buf;
- ctx = data;
-
if (ctx == NULL) {
fprintf(stderr, "ERROR: ctx is NULL\n");
return -1;
@@ -762,9 +682,8 @@ gf_log_init(void *data, const char *file, const char *ident)
}
if (mkdir_p(logdir, 0755, true)) {
/* EEXIST is handled in mkdir_p() itself */
- gf_msg("logging", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
- "failed to create metrics dir %s (%s)", logdir,
- strerror(errno));
+ gf_smsg("logging", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
+ "logdir=%s", logdir, "errno=%s", strerror(errno), NULL);
GF_FREE(logdir);
return -1;
}
@@ -808,12 +727,8 @@ gf_log_init(void *data, const char *file, const char *ident)
void
set_sys_log_level(gf_loglevel_t level)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = THIS->ctx;
-
- if (ctx)
- ctx->log.sys_log_level = level;
+ if (THIS->ctx)
+ THIS->ctx->log.sys_log_level = level;
}
/* Check if we should be logging
@@ -823,21 +738,17 @@ set_sys_log_level(gf_loglevel_t level)
static gf_boolean_t
skip_logging(xlator_t *this, gf_loglevel_t level)
{
- gf_boolean_t ret = _gf_false;
- gf_loglevel_t existing_level = GF_LOG_NONE;
+ gf_loglevel_t existing_level = this->loglevel ? this->loglevel
+ : this->ctx->log.loglevel;
+ if (level > existing_level) {
+ return _gf_true;
+ }
if (level == GF_LOG_NONE) {
- ret = _gf_true;
- goto out;
+ return _gf_true;
}
- existing_level = this->loglevel ? this->loglevel : this->ctx->log.loglevel;
- if (level > existing_level) {
- ret = _gf_true;
- goto out;
- }
-out:
- return ret;
+ return _gf_false;
}
int
@@ -845,10 +756,10 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function,
int line, gf_loglevel_t level, const char *fmt, ...)
{
const char *basename = NULL;
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
char *logline = NULL;
char *msg = NULL;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char *callstr = NULL;
@@ -857,10 +768,7 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function,
};
int ret = 0;
va_list ap;
- glusterfs_ctx_t *ctx = NULL;
-
- this = THIS;
- ctx = this->ctx;
+ glusterfs_ctx_t *ctx = this->ctx;
if (!ctx)
goto out;
@@ -868,18 +776,6 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function,
if (skip_logging(this, level))
goto out;
- static char *level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
if (!domain || !file || !function || !fmt) {
fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
__PRETTY_FUNCTION__, __LINE__);
@@ -923,13 +819,12 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function,
if (-1 == ret)
goto out;
- gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
- ret = gf_asprintf(&logline, "[%s] %s [%s:%d:%s] %s %d-%s: %s", timestr,
- level_strings[level], basename, line, function, callstr,
- ((this->graph) ? this->graph->id : 0), domain, msg);
+ ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %s %d-%s: %s\n", timestr,
+ gf_level_strings[level], basename, line, function,
+ callstr, ((this->graph) ? this->graph->id : 0), domain,
+ msg);
if (-1 == ret) {
goto out;
}
@@ -937,10 +832,10 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function,
pthread_mutex_lock(&ctx->log.logfile_mutex);
{
if (ctx->log.logfile) {
- fprintf(ctx->log.logfile, "%s\n", logline);
+ fputs(logline, ctx->log.logfile);
fflush(ctx->log.logfile);
} else if (ctx->log.loglevel >= level) {
- fprintf(stderr, "%s\n", logline);
+ fputs(logline, stderr);
fflush(stderr);
}
@@ -949,7 +844,7 @@ _gf_log_callingfn(const char *domain, const char *file, const char *function,
and trace logs */
if (ctx->log.gf_log_syslog && level &&
(level <= ctx->log.sys_log_level))
- syslog((level - 1), "%s\n", logline);
+ syslog((level - 1), "%s", logline);
#endif
}
@@ -964,7 +859,7 @@ out:
return ret;
}
-int
+static int
_gf_msg_plain_internal(gf_loglevel_t level, const char *msg)
{
xlator_t *this = NULL;
@@ -1121,10 +1016,12 @@ _gf_msg_backtrace_nomem(gf_loglevel_t level, int stacksize)
goto out;
bt_size = backtrace(array, ((stacksize <= 200) ? stacksize : 200));
+ if (!bt_size)
+ goto out;
pthread_mutex_lock(&ctx->log.logfile_mutex);
{
fd = ctx->log.logfile ? fileno(ctx->log.logfile) : fileno(stderr);
- if (bt_size && (fd != -1)) {
+ if (fd != -1) {
/* print to the file fd, to prevent any
allocations from backtrace_symbols
*/
@@ -1190,12 +1087,13 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function,
char msg[2048] = {
0,
};
- char timestr[GF_LOG_TIMESTR_SIZE] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
glusterfs_ctx_t *ctx = NULL;
int wlen = 0;
int priority;
+ struct rusage r_usage;
this = THIS;
ctx = this->ctx;
@@ -1217,25 +1115,23 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function,
ret = gettimeofday(&tv, NULL);
if (-1 == ret)
goto out;
- gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- ret = snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, tv.tv_usec);
- if (-1 == ret) {
- goto out;
- }
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
/* TODO: Currently we print in the enhanced format, with a message ID
* of 0. Need to enhance this to support format as configured */
- ret = snprintf(msg, sizeof msg,
- "[%s] %s [MSGID: %" PRIu64
- "]"
- " [%s:%d:%s] %s: no memory "
- "available for size (%" GF_PRI_SIZET
- ")"
- " [call stack follows]\n",
- timestr, gf_level_strings[level], (uint64_t)0, basename,
- line, function, domain, size);
- if (-1 == ret) {
+ wlen = snprintf(
+ msg, sizeof msg,
+ "[%s] %c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s: no memory "
+ "available for size (%" GF_PRI_SIZET
+ ") current memory usage in kilobytes %ld"
+ " [call stack follows]\n",
+ timestr, gf_level_strings[level], (uint64_t)0, basename, line, function,
+ domain, size,
+ (!getrusage(RUSAGE_SELF, &r_usage) ? r_usage.ru_maxrss : 0));
+ if (-1 == wlen) {
+ ret = -1;
goto out;
}
@@ -1265,8 +1161,6 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function,
goto out;
}
- wlen = strlen(msg);
-
/* write directly to the fd to prevent out of order
* message and stack */
ret = sys_write(fd, msg, wlen);
@@ -1380,93 +1274,77 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file,
int errnum, uint64_t msgid, char **appmsgstr, char *callstr,
struct timeval tv, int graph_id, gf_log_format_t fmt)
{
- char timestr[GF_LOG_TIMESTR_SIZE] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char *header = NULL;
char *footer = NULL;
- char *msg = NULL;
- size_t hlen = 0, flen = 0, mlen = 0;
int ret = 0;
/* rotate if required */
gf_log_rotate(ctx);
/* format the time stamp */
- gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ /* generate footer */
+ if (errnum) {
+ ret = gf_asprintf(&footer, " [%s]\n", strerror(errnum));
+ } else {
+ ret = gf_asprintf(&footer, " \n");
+ }
+ if (-1 == ret) {
+ goto err;
+ }
- /* generate header and footer */
+ /* generate message, inc. the header */
if (fmt == gf_logformat_traditional) {
if (!callstr) {
ret = gf_asprintf(&header,
- "[%s] %s [%s:%d:%s]"
- " %d-%s: ",
+ "[%s] %c [%s:%d:%s]"
+ " %d-%s: %s",
timestr, gf_level_strings[level], file, line,
- function, graph_id, domain);
+ function, graph_id, domain, *appmsgstr);
} else {
ret = gf_asprintf(&header,
- "[%s] %s [%s:%d:%s] %s"
- " %d-%s: ",
+ "[%s] %c [%s:%d:%s] %s"
+ " %d-%s: %s",
timestr, gf_level_strings[level], file, line,
- function, callstr, graph_id, domain);
- }
- if (-1 == ret) {
- goto err;
+ function, callstr, graph_id, domain, *appmsgstr);
}
} else { /* gf_logformat_withmsgid */
/* CEE log format unsupported in logger_glusterlog, so just
* print enhanced log format */
if (!callstr) {
ret = gf_asprintf(&header,
- "[%s] %s [MSGID: %" PRIu64
+ "[%s] %c [MSGID: %" PRIu64
"]"
- " [%s:%d:%s] %d-%s: ",
+ " [%s:%d:%s] %d-%s: %s",
timestr, gf_level_strings[level], msgid, file,
- line, function, graph_id, domain);
+ line, function, graph_id, domain, *appmsgstr);
} else {
ret = gf_asprintf(&header,
- "[%s] %s [MSGID: %" PRIu64
+ "[%s] %c [MSGID: %" PRIu64
"]"
- " [%s:%d:%s] %s %d-%s: ",
+ " [%s:%d:%s] %s %d-%s: %s",
timestr, gf_level_strings[level], msgid, file,
- line, function, callstr, graph_id, domain);
- }
- if (-1 == ret) {
- goto err;
+ line, function, callstr, graph_id, domain,
+ *appmsgstr);
}
}
-
- if (errnum) {
- ret = gf_asprintf(&footer, " [%s]", strerror(errnum));
- if (-1 == ret) {
- goto err;
- }
- }
-
- /* generate the full message to log */
- hlen = strlen(header);
- flen = footer ? strlen(footer) : 0;
- mlen = strlen(*appmsgstr);
- msg = GF_MALLOC(hlen + flen + mlen + 1, gf_common_mt_char);
- if (!msg) {
- ret = -1;
+ if (-1 == ret) {
goto err;
}
- strcpy(msg, header);
- strcpy(msg + hlen, *appmsgstr);
- if (footer)
- strcpy(msg + hlen + mlen, footer);
+ /* send the full message to log */
pthread_mutex_lock(&ctx->log.logfile_mutex);
{
if (ctx->log.logfile) {
- fprintf(ctx->log.logfile, "%s\n", msg);
+ fprintf(ctx->log.logfile, "%s%s", header, footer);
fflush(ctx->log.logfile);
} else if (ctx->log.loglevel >= level) {
- fprintf(stderr, "%s\n", msg);
+ fprintf(stderr, "%s%s", header, footer);
fflush(stderr);
}
@@ -1474,8 +1352,9 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file,
/* We want only serious logs in 'syslog', not our debug
* and trace logs */
if (ctx->log.gf_log_syslog && level &&
- (level <= ctx->log.sys_log_level))
- syslog((level - 1), "%s\n", msg);
+ (level <= ctx->log.sys_log_level)) {
+ syslog((level - 1), "%s%s", header, footer);
+ }
#endif
}
@@ -1486,7 +1365,6 @@ gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file,
ret = 0;
err:
- GF_FREE(msg);
GF_FREE(header);
GF_FREE(footer);
@@ -1502,39 +1380,34 @@ gf_syslog_log_repetitions(const char *domain, const char *file,
int graph_id)
{
int priority;
- char timestr_latest[256] = {
+ char timestr_latest[GF_TIMESTR_SIZE] = {
0,
};
- char timestr_oldest[256] = {
+ char timestr_oldest[GF_TIMESTR_SIZE] = {
0,
};
SET_LOG_PRIO(level, priority);
- gf_time_fmt(timestr_latest, sizeof timestr_latest, latest.tv_sec,
- gf_timefmt_FT);
- snprintf(timestr_latest + strlen(timestr_latest),
- sizeof(timestr_latest) - strlen(timestr_latest),
- ".%" GF_PRI_SUSECONDS, latest.tv_usec);
-
- gf_time_fmt(timestr_oldest, sizeof timestr_oldest, oldest.tv_sec,
- gf_timefmt_FT);
- snprintf(timestr_oldest + strlen(timestr_oldest),
- sizeof(timestr_oldest) - strlen(timestr_oldest),
- ".%" GF_PRI_SUSECONDS, oldest.tv_usec);
+ gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest,
+ gf_timefmt_FT);
+ gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest,
+ gf_timefmt_FT);
if (errnum) {
syslog(priority,
"The message \"[MSGID: %" PRIu64
"] [%s:%d:%s] "
- "%d-%s: %s [%s] \" repeated %d times between %s and %s",
+ "%d-%s: %s [%s] \" repeated %d times between %s"
+ " and %s",
msgid, file, line, function, graph_id, domain, *appmsgstr,
strerror(errnum), refcount, timestr_oldest, timestr_latest);
} else {
syslog(priority,
"The message \"[MSGID: %" PRIu64
"] [%s:%d:%s] "
- "%d-%s: %s \" repeated %d times between %s and %s",
+ "%d-%s: %s \" repeated %d times between %s"
+ " and %s",
msgid, file, line, function, graph_id, domain, *appmsgstr,
refcount, timestr_oldest, timestr_latest);
}
@@ -1550,13 +1423,10 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain,
struct timeval latest, int graph_id)
{
int ret = 0;
- size_t hlen = 0;
- size_t flen = 0;
- size_t mlen = 0;
- char timestr_latest[256] = {
+ char timestr_latest[GF_TIMESTR_SIZE] = {
0,
};
- char timestr_oldest[256] = {
+ char timestr_oldest[GF_TIMESTR_SIZE] = {
0,
};
char errstr[256] = {
@@ -1564,65 +1434,45 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain,
};
char *header = NULL;
char *footer = NULL;
- char *msg = NULL;
if (!ctx)
goto err;
gf_log_rotate(ctx);
- gf_time_fmt(timestr_latest, sizeof timestr_latest, latest.tv_sec,
- gf_timefmt_FT);
- snprintf(timestr_latest + strlen(timestr_latest),
- sizeof(timestr_latest) - strlen(timestr_latest),
- ".%" GF_PRI_SUSECONDS, latest.tv_usec);
-
- gf_time_fmt(timestr_oldest, sizeof timestr_oldest, oldest.tv_sec,
- gf_timefmt_FT);
- snprintf(timestr_oldest + strlen(timestr_oldest),
- sizeof(timestr_oldest) - strlen(timestr_oldest),
- ".%" GF_PRI_SUSECONDS, oldest.tv_usec);
-
ret = gf_asprintf(&header,
- "The message \"%s [MSGID: %" PRIu64
+ "The message \"%c [MSGID: %" PRIu64
"]"
- " [%s:%d:%s] %d-%s: ",
+ " [%s:%d:%s] %d-%s: %s",
gf_level_strings[level], msgid, file, line, function,
- graph_id, domain);
- if (-1 == ret)
+ graph_id, domain, *appmsgstr);
+ if (-1 == ret) {
goto err;
+ }
+
+ gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest,
+ gf_timefmt_FT);
+
+ gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest,
+ gf_timefmt_FT);
if (errnum)
snprintf(errstr, sizeof(errstr) - 1, " [%s]", strerror(errnum));
- ret = gf_asprintf(&footer,
- "%s\" repeated %d times between"
- " [%s] and [%s]",
+ ret = gf_asprintf(&footer, "%s\" repeated %d times between [%s] and [%s]",
errstr, refcount, timestr_oldest, timestr_latest);
- if (-1 == ret)
- goto err;
-
- /* generate the full message to log */
- hlen = strlen(header);
- flen = strlen(footer);
- mlen = strlen(*appmsgstr);
- msg = GF_MALLOC(hlen + flen + mlen + 1, gf_common_mt_char);
- if (!msg) {
+ if (-1 == ret) {
ret = -1;
goto err;
}
- strcpy(msg, header);
- strcpy(msg + hlen, *appmsgstr);
- strcpy(msg + hlen + mlen, footer);
-
pthread_mutex_lock(&ctx->log.logfile_mutex);
{
if (ctx->log.logfile) {
- fprintf(ctx->log.logfile, "%s\n", msg);
+ fprintf(ctx->log.logfile, "%s%s\n", header, footer);
fflush(ctx->log.logfile);
} else if (ctx->log.loglevel >= level) {
- fprintf(stderr, "%s\n", msg);
+ fprintf(stderr, "%s%s\n", header, footer);
fflush(stderr);
}
@@ -1631,7 +1481,7 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain,
* and trace logs */
if (ctx->log.gf_log_syslog && level &&
(level <= ctx->log.sys_log_level))
- syslog((level - 1), "%s\n", msg);
+ syslog((level - 1), "%s%s\n", header, footer);
#endif
}
@@ -1642,7 +1492,6 @@ gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain,
ret = 0;
err:
- GF_FREE(msg);
GF_FREE(header);
GF_FREE(footer);
@@ -1658,9 +1507,7 @@ gf_log_print_with_repetitions(glusterfs_ctx_t *ctx, const char *domain,
struct timeval latest, int graph_id)
{
int ret = -1;
- gf_log_logger_t logger = 0;
-
- logger = ctx->log.logger;
+ gf_log_logger_t logger = ctx->log.logger;
switch (logger) {
case gf_logger_syslog:
@@ -1670,6 +1517,11 @@ gf_log_print_with_repetitions(glusterfs_ctx_t *ctx, const char *domain,
appmsgstr, callstr, refcount, oldest, latest, graph_id);
break;
}
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+
case gf_logger_glusterlog:
ret = gf_glusterlog_log_repetitions(
ctx, domain, file, function, line, level, errnum, msgid,
@@ -1994,7 +1846,7 @@ _gf_msg_internal(const char *domain, const char *file, const char *function,
if (size == 0) {
flush_logged_msg = _gf_true;
goto unlock;
- } else if ((ctx->log.lru_cur_size + 1) > size) {
+ } else if (((ctx->log.lru_cur_size + 1) > size) && (first)) {
/* If the list is full, flush the lru msg to disk and also
* release it after unlock, and ...
* */
@@ -2011,7 +1863,7 @@ _gf_msg_internal(const char *domain, const char *file, const char *function,
/* create a new list element, initialise and enqueue it.
* Additionally, this being the first occurrence of the msg,
* log it directly to disk after unlock. */
- buf_new = log_buf_new();
+ buf_new = mem_get0(THIS->ctx->logbuf_pool);
if (!buf_new) {
ret = -1;
goto unlock;
@@ -2063,23 +1915,11 @@ _gf_msg(const char *domain, const char *file, const char *function,
int ret = 0;
char *msgstr = NULL;
va_list ap;
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
glusterfs_ctx_t *ctx = NULL;
- char callstr[GF_LOG_BACKTRACE_SIZE] = {
- 0,
- };
- int passcallstr = 0;
+ char *callstr = NULL;
int log_inited = 0;
- /* in args check */
- if (!domain || !file || !function || !fmt) {
- fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
- __PRETTY_FUNCTION__, __LINE__);
- return -1;
- }
-
- this = THIS;
-
if (this == NULL)
return -1;
@@ -2093,20 +1933,12 @@ _gf_msg(const char *domain, const char *file, const char *function,
if (skip_logging(this, level))
goto out;
- if (trace) {
- ret = _gf_msg_backtrace(GF_LOG_BACKTRACE_DEPTH, callstr,
- GF_LOG_BACKTRACE_SIZE);
- if (ret >= 0)
- passcallstr = 1;
- }
-
- pthread_mutex_lock(&ctx->log.logfile_mutex);
- {
- if (ctx->log.logfile) {
- log_inited = 1;
- }
+ /* in args check */
+ if (!domain || !file || !function || !fmt) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
}
- pthread_mutex_unlock(&ctx->log.logfile_mutex);
/* form the message */
va_start(ap, fmt);
@@ -2115,15 +1947,35 @@ _gf_msg(const char *domain, const char *file, const char *function,
/* log */
if (ret != -1) {
+ if (trace) {
+ callstr = GF_MALLOC(GF_LOG_BACKTRACE_SIZE, gf_common_mt_char);
+ if (callstr == NULL)
+ return -1;
+
+ ret = _gf_msg_backtrace(GF_LOG_BACKTRACE_DEPTH, callstr,
+ GF_LOG_BACKTRACE_SIZE);
+ if (ret < 0) {
+ GF_FREE(callstr);
+ callstr = NULL;
+ }
+ }
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ log_inited = 1;
+ }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
if (!log_inited && ctx->log.gf_log_syslog) {
ret = gf_log_syslog(
ctx, domain, file, function, line, level, errnum, msgid,
- &msgstr, (passcallstr ? callstr : NULL),
+ &msgstr, (callstr ? callstr : NULL),
(this->graph) ? this->graph->id : 0, gf_logformat_traditional);
} else {
ret = _gf_msg_internal(domain, file, function, line, level, errnum,
- msgid, &msgstr,
- (passcallstr ? callstr : NULL),
+ msgid, &msgstr, (callstr ? callstr : NULL),
(this->graph) ? this->graph->id : 0);
}
} else {
@@ -2131,7 +1983,8 @@ _gf_msg(const char *domain, const char *file, const char *function,
* are undefined, be safe */
msgstr = NULL;
}
-
+ if (callstr)
+ GF_FREE(callstr);
FREE(msgstr);
out:
@@ -2147,7 +2000,7 @@ _gf_log(const char *domain, const char *file, const char *function, int line,
const char *basename = NULL;
FILE *new_logfile = NULL;
va_list ap;
- char timestr[GF_LOG_TIMESTR_SIZE] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
struct timeval tv = {
@@ -2157,11 +2010,8 @@ _gf_log(const char *domain, const char *file, const char *function, int line,
char *msg = NULL;
int ret = 0;
int fd = -1;
- xlator_t *this = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- this = THIS;
- ctx = this->ctx;
+ xlator_t *this = THIS;
+ glusterfs_ctx_t *ctx = this->ctx;
if (!ctx)
goto out;
@@ -2169,18 +2019,6 @@ _gf_log(const char *domain, const char *file, const char *function, int line,
if (skip_logging(this, level))
goto out;
- static char *level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
if (!domain || !file || !function || !fmt) {
fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
__PRETTY_FUNCTION__, __LINE__);
@@ -2220,16 +2058,17 @@ _gf_log(const char *domain, const char *file, const char *function, int line,
fd = sys_open(ctx->log.filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
- gf_msg("logrotate", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
- "failed to open logfile");
+ gf_smsg("logrotate", GF_LOG_ERROR, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, NULL);
return -1;
}
sys_close(fd);
new_logfile = fopen(ctx->log.filename, "a");
if (!new_logfile) {
- gf_msg("logrotate", GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED,
- "failed to open logfile %s", ctx->log.filename);
+ gf_smsg("logrotate", GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "filename=%s",
+ ctx->log.filename, NULL);
goto log;
}
@@ -2248,12 +2087,10 @@ log:
if (-1 == ret)
goto out;
- gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- snprintf(timestr + strlen(timestr), sizeof timestr - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
- ret = gf_asprintf(&logline, "[%s] %s [%s:%d:%s] %d-%s: %s", timestr,
- level_strings[level], basename, line, function,
+ ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %d-%s: %s\n", timestr,
+ gf_level_strings[level], basename, line, function,
((this->graph) ? this->graph->id : 0), domain, msg);
if (-1 == ret) {
goto err;
@@ -2262,10 +2099,10 @@ log:
pthread_mutex_lock(&ctx->log.logfile_mutex);
{
if (ctx->log.logfile) {
- fprintf(ctx->log.logfile, "%s\n", logline);
+ fputs(logline, ctx->log.logfile);
fflush(ctx->log.logfile);
} else if (ctx->log.loglevel >= level) {
- fprintf(stderr, "%s\n", logline);
+ fputs(logline, stderr);
fflush(stderr);
}
@@ -2274,7 +2111,7 @@ log:
and trace logs */
if (ctx->log.gf_log_syslog && level &&
(level <= ctx->log.sys_log_level))
- syslog((level - 1), "%s\n", logline);
+ syslog((level - 1), "%s", logline);
#endif
}
@@ -2337,8 +2174,8 @@ gf_cmd_log_init(const char *filename)
return -1;
if (!filename) {
- gf_msg(this->name, GF_LOG_CRITICAL, 0, LG_MSG_INVALID_ENTRY,
- "gf_cmd_log_init: no filename specified\n");
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, LG_MSG_FILENAME_NOT_SPECIFIED,
+ "gf_cmd_log_init", NULL);
return -1;
}
@@ -2355,17 +2192,15 @@ gf_cmd_log_init(const char *filename)
fd = sys_open(ctx->log.cmd_log_filename, O_CREAT | O_WRONLY | O_APPEND,
S_IRUSR | S_IWUSR);
if (fd < 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED,
- "failed to open cmd_log_file");
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_OPEN_LOGFILE_FAILED,
+ "cmd_log_file", NULL);
return -1;
}
ctx->log.cmdlogfile = fdopen(fd, "a");
if (!ctx->log.cmdlogfile) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED,
- "gf_cmd_log_init: failed to open logfile \"%s\" "
- "\n",
- ctx->log.cmd_log_filename);
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_OPEN_LOGFILE_FAILED,
+ "gf_cmd_log_init: %s", ctx->log.cmd_log_filename, NULL);
sys_close(fd);
return -1;
}
@@ -2376,7 +2211,7 @@ int
gf_cmd_log(const char *domain, const char *fmt, ...)
{
va_list ap;
- char timestr[64];
+ char timestr[GF_TIMESTR_SIZE];
struct timeval tv = {
0,
};
@@ -2409,11 +2244,9 @@ gf_cmd_log(const char *domain, const char *fmt, ...)
goto out;
}
- gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- snprintf(timestr + strlen(timestr), GF_LOG_TIMESTR_SIZE - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
- ret = gf_asprintf(&logline, "[%s] %s : %s", timestr, domain, msg);
+ ret = gf_asprintf(&logline, "[%s] %s : %s\n", timestr, domain, msg);
if (ret == -1) {
goto out;
}
@@ -2430,27 +2263,25 @@ gf_cmd_log(const char *domain, const char *fmt, ...)
fd = sys_open(ctx->log.cmd_log_filename, O_CREAT | O_WRONLY | O_APPEND,
S_IRUSR | S_IWUSR);
if (fd < 0) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED,
- "failed to open "
- "logfile \"%s\" \n",
- ctx->log.cmd_log_filename);
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "name=%s",
+ ctx->log.cmd_log_filename, NULL);
ret = -1;
goto out;
}
ctx->log.cmdlogfile = fdopen(fd, "a");
if (!ctx->log.cmdlogfile) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, errno, LG_MSG_FILE_OP_FAILED,
- "failed to open logfile \"%s\""
- " \n",
- ctx->log.cmd_log_filename);
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "name=%s",
+ ctx->log.cmd_log_filename, NULL);
ret = -1;
sys_close(fd);
goto out;
}
}
- fprintf(ctx->log.cmdlogfile, "%s\n", logline);
+ fputs(logline, ctx->log.cmdlogfile);
fflush(ctx->log.cmdlogfile);
out:
@@ -2462,7 +2293,7 @@ out:
}
static int
-_do_slog_format(const char *event, va_list inp, char **msg)
+_do_slog_format(int errnum, const char *event, va_list inp, char **msg)
{
va_list valist_tmp;
int i = 0;
@@ -2475,10 +2306,13 @@ _do_slog_format(const char *event, va_list inp, char **msg)
char format_char = '%';
char *tmp1 = NULL;
char *tmp2 = NULL;
+ char temp_sep[3] = "";
- ret = gf_asprintf(&tmp2, "%s", event);
- if (ret == -1)
+ tmp2 = gf_strdup("");
+ if (!tmp2) {
+ ret = -1;
goto out;
+ }
/* Hardcoded value for max key value pairs, exits early */
/* from loop if found NULL */
@@ -2526,22 +2360,45 @@ _do_slog_format(const char *event, va_list inp, char **msg)
(void)va_arg(inp, void *);
}
- ret = gf_asprintf(&tmp2, "%s\t%s", tmp1, buffer);
+ ret = gf_asprintf(&tmp2, "%s%s{%s}", tmp1, temp_sep, buffer);
if (ret < 0)
goto out;
GF_FREE(buffer);
buffer = NULL;
} else {
- ret = gf_asprintf(&tmp2, "%s\t%s", tmp1, fmt);
+ ret = gf_asprintf(&tmp2, "%s%s{%s}", tmp1, temp_sep, fmt);
if (ret < 0)
goto out;
}
+ /* Set seperator for next iteration */
+ temp_sep[0] = ',';
+ temp_sep[1] = ' ';
+ temp_sep[2] = 0;
+
GF_FREE(tmp1);
tmp1 = NULL;
}
+ tmp1 = gf_strdup(tmp2);
+ if (!tmp1) {
+ ret = -1;
+ goto out;
+ }
+ GF_FREE(tmp2);
+ tmp2 = NULL;
+
+ if (errnum) {
+ ret = gf_asprintf(&tmp2, "%s [%s%s{errno=%d}, {error=%s}]", event, tmp1,
+ temp_sep, errnum, strerror(errnum));
+ } else {
+ ret = gf_asprintf(&tmp2, "%s [%s]", event, tmp1);
+ }
+
+ if (ret == -1)
+ goto out;
+
*msg = gf_strdup(tmp2);
if (!*msg)
ret = -1;
@@ -2567,36 +2424,19 @@ _gf_smsg(const char *domain, const char *file, const char *function,
va_list valist;
char *msg = NULL;
int ret = 0;
+ xlator_t *this = THIS;
- va_start(valist, event);
- ret = _do_slog_format(event, valist, &msg);
- if (ret == -1)
- goto out;
-
- ret = _gf_msg(domain, file, function, line, level, errnum, trace, msgid,
- "%s", msg);
-
-out:
- va_end(valist);
- if (msg)
- GF_FREE(msg);
- return ret;
-}
-
-int
-_gf_slog(const char *domain, const char *file, const char *function, int line,
- gf_loglevel_t level, const char *event, ...)
-{
- va_list valist;
- char *msg = NULL;
- int ret = 0;
+ if (skip_logging(this, level))
+ return ret;
va_start(valist, event);
- ret = _do_slog_format(event, valist, &msg);
+ ret = _do_slog_format(errnum, event, valist, &msg);
if (ret == -1)
goto out;
- ret = _gf_log(domain, file, function, line, level, "%s", msg);
+ /* Pass errnum as zero since it is already formated as required */
+ ret = _gf_msg(domain, file, function, line, level, 0, trace, msgid, "%s",
+ msg);
out:
va_end(valist);
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index 4ef62b8da48..2d5a12b0a00 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -8,14 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include "mem-pool.h"
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h" // for GF_ASSERT, gf_thread_cr...
+#include "glusterfs/globals.h" // for xlator_t, THIS
#include <stdlib.h>
#include <stdarg.h>
#include "unittest/unittest.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/libglusterfs-messages.h"
void
gf_mem_acct_enable_set(void *data)
@@ -35,61 +35,101 @@ gf_mem_acct_enable_set(void *data)
return;
}
-int
-gf_mem_set_acct_info(xlator_t *xl, char **alloc_ptr, size_t size, uint32_t type,
- const char *typestr)
+static void *
+gf_mem_header_prepare(struct mem_header *header, size_t size)
{
- void *ptr = NULL;
- struct mem_header *header = NULL;
+ void *ptr;
- if (!alloc_ptr)
- return -1;
+ header->size = size;
- ptr = *alloc_ptr;
+ ptr = header + 1;
- GF_ASSERT(xl != NULL);
+ /* data follows in this gap of 'size' bytes */
+ *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
- GF_ASSERT(xl->mem_acct != NULL);
+ return ptr;
+}
- GF_ASSERT(type <= xl->mem_acct->num_types);
+static void *
+gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size, uint32_t type, const char *typestr)
+{
+ struct mem_acct_rec *rec = NULL;
+ bool new_ref = false;
- LOCK(&xl->mem_acct->rec[type].lock);
- {
- if (!xl->mem_acct->rec[type].typestr)
- xl->mem_acct->rec[type].typestr = typestr;
- xl->mem_acct->rec[type].size += size;
- xl->mem_acct->rec[type].num_allocs++;
- xl->mem_acct->rec[type].total_allocs++;
- xl->mem_acct->rec[type].max_size = max(xl->mem_acct->rec[type].max_size,
- xl->mem_acct->rec[type].size);
- xl->mem_acct->rec[type].max_num_allocs = max(
- xl->mem_acct->rec[type].max_num_allocs,
- xl->mem_acct->rec[type].num_allocs);
- }
- UNLOCK(&xl->mem_acct->rec[type].lock);
+ if (mem_acct != NULL) {
+ GF_ASSERT(type <= mem_acct->num_types);
- GF_ATOMIC_INC(xl->mem_acct->refcnt);
+ rec = &mem_acct->rec[type];
+ LOCK(&rec->lock);
+ {
+ if (!rec->typestr) {
+ rec->typestr = typestr;
+ }
+ rec->size += size;
+ new_ref = (rec->num_allocs == 0);
+ rec->num_allocs++;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+ rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs);
+
+#ifdef DEBUG
+ list_add(&header->acct_list, &rec->obj_list);
+#endif
+ }
+ UNLOCK(&rec->lock);
+
+ /* We only take a reference for each memory type used, not for each
+ * allocation. This minimizes the use of atomic operations. */
+ if (new_ref) {
+ GF_ATOMIC_INC(mem_acct->refcnt);
+ }
+ }
- header = (struct mem_header *)ptr;
header->type = type;
- header->size = size;
- header->mem_acct = xl->mem_acct;
+ header->mem_acct = mem_acct;
header->magic = GF_MEM_HEADER_MAGIC;
+ return gf_mem_header_prepare(header, size);
+}
+
+static void *
+gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size)
+{
+ struct mem_acct_rec *rec = NULL;
+
+ if (mem_acct != NULL) {
+ rec = &mem_acct->rec[header->type];
+ LOCK(&rec->lock);
+ {
+ rec->size += size - header->size;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+
#ifdef DEBUG
- INIT_LIST_HEAD(&header->acct_list);
- LOCK(&xl->mem_acct->rec[type].lock);
- {
- list_add(&header->acct_list, &(xl->mem_acct->rec[type].obj_list));
- }
- UNLOCK(&xl->mem_acct->rec[type].lock);
+ /* The old 'header' already was present in 'obj_list', but
+ * realloc() could have changed its address. We need to remove
+ * the old item from the list and add the new one. This can be
+ * done this way because list_move() doesn't use the pointers
+ * to the old location (which are not valid anymore) already
+ * present in the list, it simply overwrites them. */
+ list_move(&header->acct_list, &rec->obj_list);
#endif
- ptr += sizeof(struct mem_header);
- /* data follows in this gap of 'size' bytes */
- *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
+ }
+ UNLOCK(&rec->lock);
+ }
+
+ return gf_mem_header_prepare(header, size);
+}
- *alloc_ptr = ptr;
- return 0;
+static bool
+gf_mem_acct_enabled(void)
+{
+ xlator_t *x = THIS;
+ /* Low-level __gf_xxx() may be called
+ before ctx is initialized. */
+ return x->ctx && x->ctx->mem_acct_enable;
}
void *
@@ -97,10 +137,10 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
{
size_t tot_size = 0;
size_t req_size = 0;
- char *ptr = NULL;
+ void *ptr = NULL;
xlator_t *xl = NULL;
- if (!THIS->ctx->mem_acct_enable)
+ if (!gf_mem_acct_enabled())
return CALLOC(nmemb, size);
xl = THIS;
@@ -114,19 +154,18 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
gf_msg_nomem("", GF_LOG_ALERT, tot_size);
return NULL;
}
- gf_mem_set_acct_info(xl, &ptr, req_size, type, typestr);
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr);
}
void *
__gf_malloc(size_t size, uint32_t type, const char *typestr)
{
size_t tot_size = 0;
- char *ptr = NULL;
+ void *ptr = NULL;
xlator_t *xl = NULL;
- if (!THIS->ctx->mem_acct_enable)
+ if (!gf_mem_acct_enabled())
return MALLOC(size);
xl = THIS;
@@ -138,84 +177,32 @@ __gf_malloc(size_t size, uint32_t type, const char *typestr)
gf_msg_nomem("", GF_LOG_ALERT, tot_size);
return NULL;
}
- gf_mem_set_acct_info(xl, &ptr, size, type, typestr);
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr);
}
void *
__gf_realloc(void *ptr, size_t size)
{
size_t tot_size = 0;
- char *new_ptr;
- struct mem_header *old_header = NULL;
- struct mem_header *new_header = NULL;
- struct mem_header tmp_header;
+ struct mem_header *header = NULL;
- if (!THIS->ctx->mem_acct_enable)
+ if (!gf_mem_acct_enabled())
return REALLOC(ptr, size);
REQUIRE(NULL != ptr);
- old_header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
- GF_ASSERT(old_header->magic == GF_MEM_HEADER_MAGIC);
- tmp_header = *old_header;
-
-#ifdef DEBUG
- int type = 0;
- size_t copy_size = 0;
-
- /* Making these changes for realloc is not straightforward. So
- * I am simulating realloc using calloc and free
- */
-
- type = tmp_header.type;
- new_ptr = __gf_calloc(1, size, type,
- tmp_header.mem_acct->rec[type].typestr);
- if (new_ptr) {
- copy_size = (size > tmp_header.size) ? tmp_header.size : size;
- memcpy(new_ptr, ptr, copy_size);
- __gf_free(ptr);
- }
-
- /* This is not quite what the man page says should happen */
- return new_ptr;
-#endif
+ header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
+ GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC);
tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
- new_ptr = realloc(old_header, tot_size);
- if (!new_ptr) {
+ header = realloc(header, tot_size);
+ if (!header) {
gf_msg_nomem("", GF_LOG_ALERT, tot_size);
return NULL;
}
- /*
- * We used to pass (char **)&ptr as the second
- * argument after the value of realloc was saved
- * in ptr, but the compiler warnings complained
- * about the casting to and forth from void ** to
- * char **.
- * TBD: it would be nice to adjust the memory accounting info here,
- * but calling gf_mem_set_acct_info here is wrong because it bumps
- * up counts as though this is a new allocation - which it's not.
- * The consequence of doing nothing here is only that the sizes will be
- * wrong, but at least the counts won't be.
- uint32_t type = 0;
- xlator_t *xl = NULL;
- type = header->type;
- xl = (xlator_t *) header->xlator;
- gf_mem_set_acct_info (xl, &new_ptr, size, type, NULL);
- */
-
- new_header = (struct mem_header *)new_ptr;
- *new_header = tmp_header;
- new_header->size = size;
-
- new_ptr += sizeof(struct mem_header);
- /* data follows in this gap of 'size' bytes */
- *(uint32_t *)(new_ptr + size) = GF_MEM_TRAILER_MAGIC;
-
- return (void *)new_ptr;
+ return gf_mem_update_acct_info(header->mem_acct, header, size);
}
int
@@ -321,8 +308,9 @@ __gf_free(void *free_ptr)
void *ptr = NULL;
struct mem_acct *mem_acct;
struct mem_header *header = NULL;
+ bool last_ref = false;
- if (!THIS->ctx->mem_acct_enable) {
+ if (!gf_mem_acct_enabled()) {
FREE(free_ptr);
return;
}
@@ -348,21 +336,22 @@ __gf_free(void *free_ptr)
LOCK(&mem_acct->rec[header->type].lock);
{
- GF_ASSERT(mem_acct->rec[header->type].size >= header->size);
mem_acct->rec[header->type].size -= header->size;
mem_acct->rec[header->type].num_allocs--;
/* If all the instances are freed up then ensure typestr is set
* to NULL */
- if (!mem_acct->rec[header->type].num_allocs)
+ if (!mem_acct->rec[header->type].num_allocs) {
+ last_ref = true;
mem_acct->rec[header->type].typestr = NULL;
+ }
#ifdef DEBUG
list_del(&header->acct_list);
#endif
}
UNLOCK(&mem_acct->rec[header->type].lock);
- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
- FREE(mem_acct);
+ if (last_ref) {
+ xlator_mem_acct_unref(mem_acct);
}
free:
@@ -373,11 +362,30 @@ free:
FREE(ptr);
}
-#define POOL_SMALLEST 7 /* i.e. 128 */
-#define POOL_LARGEST 20 /* i.e. 1048576 */
-#define NPOOLS (POOL_LARGEST - POOL_SMALLEST + 1)
+#if defined(GF_DISABLE_MEMPOOL)
+
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
+{
+ struct mem_pool *new;
+
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
+
+ new->sizeof_type = sizeof_type;
+ return new;
+}
+
+void
+mem_pool_destroy(struct mem_pool *pool)
+{
+ GF_FREE(pool);
+}
+
+#else /* !GF_DISABLE_MEMPOOL */
-static pthread_key_t pool_key;
static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
static struct list_head pool_threads;
static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -385,23 +393,18 @@ static struct list_head pool_free_threads;
static struct mem_pool_shared pools[NPOOLS];
static size_t pool_list_size;
-#if !defined(GF_DISABLE_MEMPOOL)
+static __thread per_thread_pool_list_t *thread_pool_list = NULL;
+
#define N_COLD_LISTS 1024
#define POOL_SWEEP_SECS 30
-static unsigned long sweep_times;
-static unsigned long sweep_usecs;
-static unsigned long frees_to_system;
-
typedef struct {
- struct list_head death_row;
pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
unsigned int n_cold_lists;
} sweep_state_t;
enum init_state {
GF_MEMPOOL_INIT_NONE = 0,
- GF_MEMPOOL_INIT_PREINIT,
GF_MEMPOOL_INIT_EARLY,
GF_MEMPOOL_INIT_LATE,
GF_MEMPOOL_INIT_DESTROY
@@ -412,36 +415,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
static unsigned int init_count = 0;
static pthread_t sweeper_tid;
-gf_boolean_t
+static bool
collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
{
unsigned int i;
per_thread_pool_t *pt_pool;
- gf_boolean_t poisoned;
(void)pthread_spin_lock(&pool_list->lock);
- poisoned = pool_list->poison != 0;
- if (!poisoned) {
- for (i = 0; i < NPOOLS; ++i) {
- pt_pool = &pool_list->pools[i];
- if (pt_pool->cold_list) {
- if (state->n_cold_lists >= N_COLD_LISTS) {
- break;
- }
- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
+ for (i = 0; i < NPOOLS; ++i) {
+ pt_pool = &pool_list->pools[i];
+ if (pt_pool->cold_list) {
+ if (state->n_cold_lists >= N_COLD_LISTS) {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ return true;
}
- pt_pool->cold_list = pt_pool->hot_list;
- pt_pool->hot_list = NULL;
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
}
+ pt_pool->cold_list = pt_pool->hot_list;
+ pt_pool->hot_list = NULL;
}
(void)pthread_spin_unlock(&pool_list->lock);
- return poisoned;
+ return false;
}
-void
+static void
free_obj_list(pooled_obj_hdr_t *victim)
{
pooled_obj_hdr_t *next;
@@ -450,93 +450,101 @@ free_obj_list(pooled_obj_hdr_t *victim)
next = victim->next;
free(victim);
victim = next;
- ++frees_to_system;
}
}
-void *
+static void *
pool_sweeper(void *arg)
{
sweep_state_t state;
per_thread_pool_list_t *pool_list;
- per_thread_pool_list_t *next_pl;
- per_thread_pool_t *pt_pool;
- unsigned int i;
- struct timeval begin_time;
- struct timeval end_time;
- struct timeval elapsed;
- gf_boolean_t poisoned;
+ uint32_t i;
+ bool pending;
/*
* This is all a bit inelegant, but the point is to avoid doing
* expensive things (like freeing thousands of objects) while holding a
- * global lock. Thus, we split each iteration into three passes, with
+ * global lock. Thus, we split each iteration into two passes, with
* only the first and fastest holding the lock.
*/
+ pending = true;
+
for (;;) {
- sleep(POOL_SWEEP_SECS);
+ /* If we know there's pending work to do (or it's the first run), we
+ * do collect garbage more often. */
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
+
(void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
- INIT_LIST_HEAD(&state.death_row);
state.n_cold_lists = 0;
+ pending = false;
/* First pass: collect stuff that needs our attention. */
- (void)gettimeofday(&begin_time, NULL);
(void)pthread_mutex_lock(&pool_lock);
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list)
+ list_for_each_entry(pool_list, &pool_threads, thr_list)
{
- (void)pthread_mutex_unlock(&pool_lock);
- poisoned = collect_garbage(&state, pool_list);
- (void)pthread_mutex_lock(&pool_lock);
-
- if (poisoned) {
- list_move(&pool_list->thr_list, &state.death_row);
+ if (collect_garbage(&state, pool_list)) {
+ pending = true;
}
}
(void)pthread_mutex_unlock(&pool_lock);
- (void)gettimeofday(&end_time, NULL);
- timersub(&end_time, &begin_time, &elapsed);
- sweep_usecs += elapsed.tv_sec * 1000000 + elapsed.tv_usec;
- sweep_times += 1;
-
- /* Second pass: free dead pools. */
- (void)pthread_mutex_lock(&pool_free_lock);
- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list)
- {
- for (i = 0; i < NPOOLS; ++i) {
- pt_pool = &pool_list->pools[i];
- free_obj_list(pt_pool->cold_list);
- free_obj_list(pt_pool->hot_list);
- pt_pool->hot_list = pt_pool->cold_list = NULL;
- }
- list_del(&pool_list->thr_list);
- list_add(&pool_list->thr_list, &pool_free_threads);
- }
- (void)pthread_mutex_unlock(&pool_free_lock);
- /* Third pass: free cold objects from live pools. */
+ /* Second pass: free cold objects from live pools. */
for (i = 0; i < state.n_cold_lists; ++i) {
free_obj_list(state.cold_lists[i]);
}
(void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
}
+
+ return NULL;
}
void
-pool_destructor(void *arg)
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
- per_thread_pool_list_t *pool_list = arg;
+ per_thread_pool_t *pt_pool;
+ uint32_t i;
- /* The pool-sweeper thread will take it from here.
- *
- * We can change 'poison' here without taking locks because the change
- * itself doesn't interact with other parts of the code and a simple write
- * is already atomic from the point of view of the processor.
- *
- * This change can modify what mem_put() does, but both possibilities are
- * fine until the sweeper thread kicks in. The real synchronization must be
- * between mem_put() and the sweeper thread. */
- pool_list->poison = 1;
+ if (pool_list == NULL) {
+ pool_list = thread_pool_list;
+ }
+
+ /* The current thread is terminating. None of the allocated objects will
+ * be used again. We can directly destroy them here instead of delaying
+ * it until the next sweeper loop. */
+ if (pool_list != NULL) {
+ /* Remove pool_list from the global list to avoid that sweeper
+ * could touch it. */
+ pthread_mutex_lock(&pool_lock);
+ list_del(&pool_list->thr_list);
+ pthread_mutex_unlock(&pool_lock);
+
+ /* We need to protect hot/cold changes from potential mem_put() calls
+ * that reference this pool_list. Once poison is set to true, we are
+ * sure that no one else will touch hot/cold lists. The only possible
+ * race is when at the same moment a mem_put() is adding a new item
+ * to the hot list. We protect from that by taking pool_list->lock.
+ * After that we don't need the lock to destroy the hot/cold lists. */
+ pthread_spin_lock(&pool_list->lock);
+ pool_list->poison = true;
+ pthread_spin_unlock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; i++) {
+ pt_pool = &pool_list->pools[i];
+
+ free_obj_list(pt_pool->hot_list);
+ pt_pool->hot_list = NULL;
+
+ free_obj_list(pt_pool->cold_list);
+ pt_pool->cold_list = NULL;
+ }
+
+ pthread_mutex_lock(&pool_free_lock);
+ list_add(&pool_list->thr_list, &pool_free_threads);
+ pthread_mutex_unlock(&pool_free_lock);
+
+ thread_pool_list = NULL;
+ }
}
static __attribute__((constructor)) void
@@ -559,46 +567,30 @@ mem_pools_preinit(void)
pool_list_size = sizeof(per_thread_pool_list_t) +
sizeof(per_thread_pool_t) * (NPOOLS - 1);
- init_done = GF_MEMPOOL_INIT_PREINIT;
+ init_done = GF_MEMPOOL_INIT_EARLY;
}
-/* Use mem_pools_init_early() function for basic initialization. There will be
- * no cleanup done by the pool_sweeper thread until mem_pools_init_late() has
- * been called. Calling mem_get() will be possible after this function has
- * setup the basic structures. */
-void
-mem_pools_init_early(void)
+static __attribute__((destructor)) void
+mem_pools_postfini(void)
{
- pthread_mutex_lock(&init_mutex);
- /* Use a pthread_key destructor to clean up when a thread exits.
+ /* TODO: This function should destroy all per thread memory pools that
+ * are still alive, but this is not possible right now because glibc
+ * starts calling destructors as soon as exit() is called, and
+ * gluster doesn't ensure that all threads have been stopped before
+ * calling exit(). Existing threads would crash when they try to use
+ * memory or they terminate if we destroy things here.
*
- * We won't increase init_count here, that is only done when the
- * pool_sweeper thread is started too.
- */
- if (init_done == GF_MEMPOOL_INIT_PREINIT ||
- init_done == GF_MEMPOOL_INIT_DESTROY) {
- /* key has not been created yet */
- if (pthread_key_create(&pool_key, pool_destructor) != 0) {
- gf_log("mem-pool", GF_LOG_CRITICAL,
- "failed to initialize mem-pool key");
- }
-
- init_done = GF_MEMPOOL_INIT_EARLY;
- } else {
- gf_log("mem-pool", GF_LOG_CRITICAL,
- "incorrect order of mem-pool initialization "
- "(init_done=%d)",
- init_done);
- }
-
- pthread_mutex_unlock(&init_mutex);
+ * When we propertly terminate all threads, we can add the needed
+ * code here. Till then we need to leave the memory allocated. Most
+ * probably this function will be executed on process termination,
+ * so the memory will be released anyway by the system. */
}
-/* Call mem_pools_init_late() once threading has been configured completely.
- * This prevent the pool_sweeper thread from getting killed once the main()
- * thread exits during deamonizing. */
+/* Call mem_pools_init() once threading has been configured completely. This
+ * prevent the pool_sweeper thread from getting killed once the main() thread
+ * exits during deamonizing. */
void
-mem_pools_init_late(void)
+mem_pools_init(void)
{
pthread_mutex_lock(&init_mutex);
if ((init_count++) == 0) {
@@ -617,56 +609,27 @@ mem_pools_fini(void)
switch (init_count) {
case 0:
/*
- * If init_count is already zero (as e.g. if somebody called
- * this before mem_pools_init_late) then the sweeper was
- * probably never even started so we don't need to stop it.
- * Even if there's some crazy circumstance where there is a
- * sweeper but init_count is still zero, that just means we'll
- * leave it running. Not perfect, but far better than any
- * known alternative.
+ * If init_count is already zero (as e.g. if somebody called this
+ * before mem_pools_init) then the sweeper was probably never even
+ * started so we don't need to stop it. Even if there's some crazy
+ * circumstance where there is a sweeper but init_count is still
+ * zero, that just means we'll leave it running. Not perfect, but
+ * far better than any known alternative.
*/
break;
case 1: {
- per_thread_pool_list_t *pool_list;
- per_thread_pool_list_t *next_pl;
- unsigned int i;
-
- /* if only mem_pools_init_early() was called, sweeper_tid will
- * be invalid and the functions will error out. That is not
- * critical. In all other cases, the sweeper_tid will be valid
- * and the thread gets stopped. */
+ /* if mem_pools_init() was not called, sweeper_tid will be invalid
+ * and the functions will error out. That is not critical. In all
+ * other cases, the sweeper_tid will be valid and the thread gets
+ * stopped. */
(void)pthread_cancel(sweeper_tid);
(void)pthread_join(sweeper_tid, NULL);
- /* Need to clean the pool_key to prevent further usage of the
- * per_thread_pool_list_t structure that is stored for each
- * thread.
- * This also prevents calling pool_destructor() when a thread
- * exits, so there is no chance on a use-after-free of the
- * per_thread_pool_list_t structure. */
- (void)pthread_key_delete(pool_key);
-
- /* free all objects from all pools */
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads,
- thr_list)
- {
- for (i = 0; i < NPOOLS; ++i) {
- free_obj_list(pool_list->pools[i].hot_list);
- free_obj_list(pool_list->pools[i].cold_list);
- pool_list->pools[i].hot_list = NULL;
- pool_list->pools[i].cold_list = NULL;
- }
-
- list_del(&pool_list->thr_list);
- FREE(pool_list);
- }
-
- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads,
- thr_list)
- {
- list_del(&pool_list->thr_list);
- FREE(pool_list);
- }
+ /* There could be threads still running in some cases, so we can't
+ * destroy pool_lists in use. We can also not destroy unused
+ * pool_lists because some allocated objects may still be pointing
+ * to them. */
+ mem_pool_thread_destructor(NULL);
init_done = GF_MEMPOOL_INIT_DESTROY;
/* Fall through. */
@@ -677,26 +640,36 @@ mem_pools_fini(void)
pthread_mutex_unlock(&init_mutex);
}
-#else
-void
-mem_pools_init_early(void)
-{
-}
void
-mem_pools_init_late(void)
-{
-}
-void
-mem_pools_fini(void)
+mem_pool_destroy(struct mem_pool *pool)
{
+ if (!pool)
+ return;
+
+ /* remove this pool from the owner (glusterfs_ctx_t) */
+ LOCK(&pool->ctx->lock);
+ {
+ list_del(&pool->owner);
+ }
+ UNLOCK(&pool->ctx->lock);
+
+ /* free this pool, but keep the mem_pool_shared */
+ GF_FREE(pool);
+
+ /*
+ * Pools are now permanent, so the mem_pool->pool is kept around. All
+ * of the objects *in* the pool will eventually be freed via the
+ * pool-sweeper thread, and this way we don't have to add a lot of
+ * reference-counting complexity.
+ */
}
-#endif
struct mem_pool *
mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
unsigned long count, char *name)
{
- unsigned int i;
+ unsigned long extra_size, size;
+ unsigned int power;
struct mem_pool *new = NULL;
struct mem_pool_shared *pool = NULL;
@@ -706,20 +679,33 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
return NULL;
}
- for (i = 0; i < NPOOLS; ++i) {
- if (sizeof_type <= AVAILABLE_SIZE(pools[i].power_of_two)) {
- pool = &pools[i];
- break;
- }
- }
-
- if (!pool) {
+ /* This is the overhead we'll have because of memory accounting for each
+ * memory block. */
+ extra_size = sizeof(pooled_obj_hdr_t);
+
+ /* We need to compute the total space needed to hold the data type and
+ * the header. Given that the smallest block size we have in the pools
+ * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST).
+ * However, since this value is only needed to compute its rounded
+ * logarithm in base 2, and this only depends on the highest bit set,
+ * we can simply do a bitwise or with the minimum size. We need to
+ * subtract 1 for correct handling of sizes that are exactly a power
+ * of 2. */
+ size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL);
+
+ /* We compute the logarithm in base 2 rounded up of the resulting size.
+ * This value will identify which pool we need to use from the pools of
+ * powers of 2. This is equivalent to finding the position of the highest
+ * bit set. */
+ power = sizeof(size) * 8 - __builtin_clzl(size);
+ if (power > POOL_LARGEST) {
gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
"invalid argument");
return NULL;
}
+ pool = &pools[power - POOL_SMALLEST];
- new = GF_CALLOC(sizeof(struct mem_pool), 1, gf_common_mt_mem_pool);
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
if (!new)
return NULL;
@@ -727,8 +713,13 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
new->sizeof_type = sizeof_type;
new->count = count;
new->name = name;
+ new->xl_name = THIS->name;
new->pool = pool;
GF_ATOMIC_INIT(new->active, 0);
+#ifdef DEBUG
+ GF_ATOMIC_INIT(new->hit, 0);
+ GF_ATOMIC_INIT(new->miss, 0);
+#endif
INIT_LIST_HEAD(&new->owner);
LOCK(&ctx->lock);
@@ -740,36 +731,13 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
return new;
}
-void *
-mem_get0(struct mem_pool *mem_pool)
-{
- void *ptr = NULL;
-
- if (!mem_pool) {
- gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
- "invalid argument");
- return NULL;
- }
-
- ptr = mem_get(mem_pool);
- if (ptr) {
-#if defined(GF_DISABLE_MEMPOOL)
- memset(ptr, 0, mem_pool->sizeof_type);
-#else
- memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
-#endif
- }
-
- return ptr;
-}
-
per_thread_pool_list_t *
mem_get_pool_list(void)
{
per_thread_pool_list_t *pool_list;
unsigned int i;
- pool_list = pthread_getspecific(pool_key);
+ pool_list = thread_pool_list;
if (pool_list) {
return pool_list;
}
@@ -797,21 +765,33 @@ mem_get_pool_list(void)
}
}
+ /* There's no need to take pool_list->lock, because this is already an
+ * atomic operation and we don't need to synchronize it with any change
+ * in hot/cold lists. */
+ pool_list->poison = false;
+
(void)pthread_mutex_lock(&pool_lock);
- pool_list->poison = 0;
list_add(&pool_list->thr_list, &pool_threads);
(void)pthread_mutex_unlock(&pool_lock);
- (void)pthread_setspecific(pool_key, pool_list);
+ thread_pool_list = pool_list;
+
+ /* Ensure that all memory objects associated to the new pool_list are
+ * destroyed when the thread terminates. */
+ gf_thread_needs_cleanup();
+
return pool_list;
}
-pooled_obj_hdr_t *
+static pooled_obj_hdr_t *
mem_get_from_pool(struct mem_pool *mem_pool)
{
per_thread_pool_list_t *pool_list;
per_thread_pool_t *pt_pool;
pooled_obj_hdr_t *retval;
+#ifdef DEBUG
+ gf_boolean_t hit = _gf_true;
+#endif
pool_list = mem_get_pool_list();
if (!pool_list || pool_list->poison) {
@@ -837,34 +817,58 @@ mem_get_from_pool(struct mem_pool *mem_pool)
(void)pthread_spin_unlock(&pool_list->lock);
GF_ATOMIC_INC(pt_pool->parent->allocs_stdc);
retval = malloc(1 << pt_pool->parent->power_of_two);
+#ifdef DEBUG
+ hit = _gf_false;
+#endif
}
}
if (retval != NULL) {
- retval->magic = GF_MEM_HEADER_MAGIC;
retval->pool = mem_pool;
- retval->pool_list = pool_list;
retval->power_of_two = mem_pool->pool->power_of_two;
+#ifdef DEBUG
+ if (hit == _gf_true)
+ GF_ATOMIC_INC(mem_pool->hit);
+ else
+ GF_ATOMIC_INC(mem_pool->miss);
+#endif
+ retval->magic = GF_MEM_HEADER_MAGIC;
+ retval->pool_list = pool_list;
}
return retval;
}
+#endif /* GF_DISABLE_MEMPOOL */
+
void *
-mem_get(struct mem_pool *mem_pool)
+mem_get0(struct mem_pool *mem_pool)
{
+ void *ptr = mem_get(mem_pool);
+ if (ptr) {
#if defined(GF_DISABLE_MEMPOOL)
- return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+ memset(ptr, 0, mem_pool->sizeof_type);
#else
- pooled_obj_hdr_t *retval;
+ memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
+#endif
+ }
+ return ptr;
+}
+
+void *
+mem_get(struct mem_pool *mem_pool)
+{
if (!mem_pool) {
gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
"invalid argument");
return NULL;
}
- retval = mem_get_from_pool(mem_pool);
+#if defined(GF_DISABLE_MEMPOOL)
+ return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+#else
+ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool);
if (!retval) {
return NULL;
}
@@ -896,10 +900,19 @@ mem_put(void *ptr)
/* Not one of ours; don't touch it. */
return;
}
+
+ if (!hdr->pool_list) {
+ gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL,
+ LG_MSG_INVALID_ARG,
+ "invalid argument hdr->pool_list NULL");
+ return;
+ }
+
pool_list = hdr->pool_list;
pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST];
- GF_ATOMIC_DEC(hdr->pool->active);
+ if (hdr->pool)
+ GF_ATOMIC_DEC(hdr->pool->active);
hdr->magic = GF_MEM_INVALID_MAGIC;
@@ -917,27 +930,3 @@ mem_put(void *ptr)
}
#endif /* GF_DISABLE_MEMPOOL */
}
-
-void
-mem_pool_destroy(struct mem_pool *pool)
-{
- if (!pool)
- return;
-
- /* remove this pool from the owner (glusterfs_ctx_t) */
- LOCK(&pool->ctx->lock);
- {
- list_del(&pool->owner);
- }
- UNLOCK(&pool->ctx->lock);
-
- /* free this pool, but keep the mem_pool_shared */
- GF_FREE(pool);
-
- /*
- * Pools are now permanent, so the mem_pool->pool is kept around. All
- * of the objects *in* the pool will eventually be freed via the
- * pool-sweeper thread, and this way we don't have to add a lot of
- * reference-counting complexity.
- */
-}
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
deleted file mode 100644
index 4f6ca4e0e84..00000000000
--- a/libglusterfs/src/mem-types.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- Copyright (c) 2008-2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __MEM_TYPES_H__
-#define __MEM_TYPES_H__
-
-enum gf_common_mem_types_ {
- gf_common_mt_call_stub_t,
- gf_common_mt_dnscache6,
- gf_common_mt_data_pair_t,
- gf_common_mt_data_t,
- gf_common_mt_dict_t,
- gf_common_mt_event_pool,
- gf_common_mt_reg,
- gf_common_mt_pollfd,
- gf_common_mt_epoll_event,
- gf_common_mt_fdentry_t,
- gf_common_mt_fdtable_t,
- gf_common_mt_fd_t,
- gf_common_mt_fd_ctx,
- gf_common_mt_gf_dirent_t,
- gf_common_mt_glusterfs_ctx_t,
- gf_common_mt_dentry_t,
- gf_common_mt_inode_t,
- gf_common_mt_inode_ctx,
- gf_common_mt_list_head,
- gf_common_mt_inode_table_t,
- gf_common_mt_xlator_t,
- gf_common_mt_xlator_list_t,
- gf_common_mt_log_msg,
- gf_common_mt_client_log,
- gf_common_mt_volume_opt_list_t,
- gf_common_mt_gf_hdr_common_t,
- gf_common_mt_call_frame_t,
- gf_common_mt_call_stack_t,
- gf_common_mt_gf_timer_t,
- gf_common_mt_gf_timer_registry_t,
- gf_common_mt_transport,
- gf_common_mt_transport_msg,
- gf_common_mt_auth_handle_t,
- gf_common_mt_iobuf,
- gf_common_mt_iobuf_arena,
- gf_common_mt_iobref,
- gf_common_mt_iobuf_pool,
- gf_common_mt_iovec,
- gf_common_mt_memdup,
- gf_common_mt_asprintf,
- gf_common_mt_strdup,
- gf_common_mt_socket_private_t,
- gf_common_mt_ioq,
- gf_common_mt_transport_t,
- gf_common_mt_socket_local_t,
- gf_common_mt_char,
- gf_common_mt_rbthash_table_t,
- gf_common_mt_rbthash_bucket,
- gf_common_mt_mem_pool,
- gf_common_mt_long,
- gf_common_mt_rpcsvc_auth_list,
- gf_common_mt_rpcsvc_t,
- gf_common_mt_rpcsvc_conn_t,
- gf_common_mt_rpcsvc_program_t,
- gf_common_mt_rpcsvc_listener_t,
- gf_common_mt_rpcsvc_wrapper_t,
- gf_common_mt_rpcsvc_stage_t,
- gf_common_mt_rpcclnt_t,
- gf_common_mt_rpcclnt_savedframe_t,
- gf_common_mt_rpc_trans_t,
- gf_common_mt_rpc_trans_pollin_t,
- gf_common_mt_rpc_trans_handover_t,
- gf_common_mt_rpc_trans_reqinfo_t,
- gf_common_mt_rpc_trans_rsp_t,
- gf_common_mt_glusterfs_graph_t,
- gf_common_mt_rdma_private_t,
- gf_common_mt_rdma_ioq_t,
- gf_common_mt_rpc_transport_t,
- gf_common_mt_rdma_local_t,
- gf_common_mt_rdma_post_t,
- gf_common_mt_qpent,
- gf_common_mt_rdma_device_t,
- gf_common_mt_rdma_context_t,
- gf_common_mt_sge,
- gf_common_mt_rpcclnt_cb_program_t,
- gf_common_mt_libxl_marker_local,
- gf_common_mt_graph_buf,
- gf_common_mt_trie_trie,
- gf_common_mt_trie_data,
- gf_common_mt_trie_node,
- gf_common_mt_trie_buf,
- gf_common_mt_trie_end,
- gf_common_mt_run_argv,
- gf_common_mt_run_logbuf,
- gf_common_mt_fd_lk_ctx_t,
- gf_common_mt_fd_lk_ctx_node_t,
- gf_common_mt_buffer_t,
- gf_common_mt_circular_buffer_t,
- gf_common_mt_eh_t,
- gf_common_mt_store_handle_t,
- gf_common_mt_store_iter_t,
- gf_common_mt_drc_client_t,
- gf_common_mt_drc_globals_t,
- gf_common_mt_drc_rbtree_node_t,
- gf_common_mt_iov_base_t,
- gf_common_mt_groups_t,
- gf_common_mt_cliententry_t,
- gf_common_mt_clienttable_t,
- gf_common_mt_client_t,
- gf_common_mt_client_ctx,
- gf_common_mt_lock_table,
- gf_common_mt_locker,
- gf_common_mt_auxgids,
- gf_common_mt_syncopctx,
- gf_common_mt_iobrefs,
- gf_common_mt_gsync_status_t,
- gf_common_mt_uuid_t,
- gf_common_mt_mgmt_v3_lock_obj_t,
- gf_common_mt_txn_opinfo_obj_t,
- gf_common_mt_strfd_t,
- gf_common_mt_strfd_data_t,
- gf_common_mt_regex_t,
- gf_common_mt_ereg,
- gf_common_mt_wr,
- gf_common_mt_rdma_arena_mr,
- gf_common_mt_dnscache = 115,
- gf_common_mt_dnscache_entry = 116,
- gf_common_mt_parser_t,
- gf_common_quota_meta_t,
- /*related to gfdb library*/
- gfdb_mt_time_t,
- gf_mt_sql_cbk_args_t,
- gf_mt_gfdb_query_record_t,
- gf_mt_gfdb_link_info_t,
- gf_mt_gfdb_db_operations_t,
- gf_mt_sql_connection_t,
- gf_mt_sql_conn_node_t,
- gf_mt_db_conn_node_t,
- gf_mt_db_connection_t,
- gfdb_mt_db_record_t,
- /*related to gfdb library*/
- gf_common_mt_rbuf_t,
- gf_common_mt_rlist_t,
- gf_common_mt_rvec_t,
- /* glusterd can load the nfs-xlator dynamically and needs these two */
- gf_common_mt_nfs_netgroups,
- gf_common_mt_nfs_exports,
- gf_common_mt_gf_brick_spec_t,
- gf_common_mt_gf_timer_entry_t,
- gf_common_mt_int,
- gf_common_mt_pointer,
- gf_common_mt_synctask,
- gf_common_mt_syncstack,
- gf_common_mt_syncenv,
- gf_common_mt_scan_data,
- gf_common_list_node,
- gf_mt_default_args_t,
- gf_mt_default_args_cbk_t,
- /*used for compound fops*/
- gf_mt_compound_req_t,
- gf_mt_compound_rsp_t,
- gf_common_mt_tw_ctx,
- gf_common_mt_tw_timer_list,
- /*lock migration*/
- gf_common_mt_lock_mig,
- /* throttle */
- gf_common_mt_tbf_t,
- gf_common_mt_tbf_bucket_t,
- gf_common_mt_tbf_throttle_t,
- gf_common_mt_pthread_t,
- gf_common_ping_local_t,
- gf_common_volfile_t,
- gf_common_mt_mgmt_v3_lock_timer_t,
- gf_common_mt_server_cmdline_t,
- gf_common_mt_end
-};
-#endif
diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c
index 5f11827b187..fbb68dc8622 100644
--- a/libglusterfs/src/monitoring.c
+++ b/libglusterfs/src/monitoring.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "monitoring.h"
-#include "xlator.h"
-#include "syscall.h"
+#include "glusterfs/monitoring.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/syscall.h"
#include <stdlib.h>
@@ -113,15 +113,15 @@ dump_latency_and_count(xlator_t *xl, int fd)
dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
gf_fop_list[index], cbk);
}
- if (xl->stats.interval.latencies[index].count != 0.0) {
+ if (xl->stats.interval.latencies[index].count != 0) {
dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
gf_fop_list[index],
- (xl->stats.interval.latencies[index].total /
+ (((double)xl->stats.interval.latencies[index].total) /
xl->stats.interval.latencies[index].count));
- dprintf(fd, "%s.interval.%s.max %lf\n", xl->name,
+ dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,
gf_fop_list[index],
xl->stats.interval.latencies[index].max);
- dprintf(fd, "%s.interval.%s.min %lf\n", xl->name,
+ dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,
gf_fop_list[index],
xl->stats.interval.latencies[index].min);
}
@@ -230,6 +230,8 @@ gf_monitor_metrics(glusterfs_ctx_t *ctx)
int fd = 0;
char *filepath = NULL, *dumppath = NULL;
+ gf_msg_trace("monitoring", 0, "received monitoring request (sig:USR2)");
+
dumppath = ctx->config.metrics_dumppath;
if (dumppath == NULL) {
dumppath = GLUSTER_METRICS_DIR;
diff --git a/libglusterfs/src/options.c b/libglusterfs/src/options.c
index 12078327b4f..f6b5aa0ea23 100644
--- a/libglusterfs/src/options.c
+++ b/libglusterfs/src/options.c
@@ -10,9 +10,9 @@
#include <fnmatch.h>
-#include "xlator.h"
-#include "defaults.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/libglusterfs-messages.h"
#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
@@ -25,7 +25,8 @@ xlator_option_validate_path(xlator_t *xl, const char *key, const char *value,
if (strstr(value, "../")) {
snprintf(errstr, 256, "invalid path given '%s'", value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -35,7 +36,8 @@ xlator_option_validate_path(xlator_t *xl, const char *key, const char *value,
"option %s %s: '%s' is not an "
"absolute path name",
key, value, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -59,7 +61,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value,
if (gf_string2longlong(value, &inputll) != 0) {
snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
value, key);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -67,7 +70,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value,
if ((inputll == 0) && (gf_string2ulonglong(value, &uinputll) != 0)) {
snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
value, key);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -87,8 +91,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value,
"'%lld' in 'option %s %s' is smaller than "
"minimum value '%.0f'",
inputll, key, value, opt->min);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s",
- errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
} else if (opt->validate == GF_OPT_VALIDATE_MAX) {
@@ -97,8 +101,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value,
"'%lld' in 'option %s %s' is greater than "
"maximum value '%.0f'",
inputll, key, value, opt->max);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s",
- errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
} else if ((inputll < opt->min) || (inputll > opt->max)) {
@@ -106,7 +110,8 @@ xlator_option_validate_int(xlator_t *xl, const char *key, const char *value,
"'%lld' in 'option %s %s' is out of range "
"[%.0f - %.0f]",
inputll, key, value, opt->min, opt->max);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -129,7 +134,8 @@ xlator_option_validate_sizet(xlator_t *xl, const char *key, const char *value,
if (gf_string2bytesize_uint64(value, &size) != 0) {
snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
value, key);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
ret = -1;
goto out;
}
@@ -147,7 +153,8 @@ xlator_option_validate_sizet(xlator_t *xl, const char *key, const char *value,
"'%" PRIu64
"' in 'option %s %s' is out of range [%.0f - %.0f]",
size, key, value, opt->min, opt->max);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
ret = -1;
}
@@ -171,7 +178,8 @@ xlator_option_validate_bool(xlator_t *xl, const char *key, const char *value,
if (gf_string2boolean(value, &is_valid) != 0) {
snprintf(errstr, 256, "option %s %s: '%s' is not a valid boolean value",
key, value, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -206,7 +214,8 @@ xlator_option_validate_xlator(xlator_t *xl, const char *key, const char *value,
if (!xlopt) {
snprintf(errstr, 256, "option %s %s: '%s' is not a valid volume name",
key, value, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -217,7 +226,7 @@ out:
return ret;
}
-void
+static void
set_error_str(char *errstr, size_t len, volume_option_t *opt, const char *key,
const char *value)
{
@@ -239,18 +248,15 @@ set_error_str(char *errstr, size_t len, volume_option_t *opt, const char *key,
return;
}
-int
+static int
is_all_whitespaces(const char *value)
{
int i = 0;
- size_t len = 0;
if (value == NULL)
return -1;
- len = strlen(value);
-
- for (i = 0; i < len; i++) {
+ for (i = 0; value[i] != '\0'; i++) {
if (value[i] == ' ')
continue;
else
@@ -266,9 +272,6 @@ xlator_option_validate_str(xlator_t *xl, const char *key, const char *value,
{
int ret = -1;
int i = 0;
- char errstr[4096] = {
- 0,
- };
/* Check if the '*str' is valid */
if (GF_OPTION_LIST_EMPTY(opt)) {
@@ -308,9 +311,11 @@ xlator_option_validate_str(xlator_t *xl, const char *key, const char *value,
out:
if (ret) {
+ char errstr[4096];
set_error_str(errstr, sizeof(errstr), opt, key, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
if (op_errstr)
*op_errstr = gf_strdup(errstr);
}
@@ -329,7 +334,8 @@ xlator_option_validate_percent(xlator_t *xl, const char *key, const char *value,
if (gf_string2percent(value, &percent) != 0) {
snprintf(errstr, 256, "invalid percent format \"%s\" in \"option %s\"",
value, key);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -337,7 +343,8 @@ xlator_option_validate_percent(xlator_t *xl, const char *key, const char *value,
snprintf(errstr, 256,
"'%lf' in 'option %s %s' is out of range [0 - 100]", percent,
key, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -383,8 +390,8 @@ xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key,
"'%lf' in 'option %s %s' is out"
" of range [0 - 100]",
size, key, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s",
- errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE,
+ "error=%s", errstr, NULL);
goto out;
}
ret = 0;
@@ -399,8 +406,8 @@ xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key,
" %s' should not be fractional value. Use "
"valid unsigned integer value.",
size, key, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s",
- errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -418,8 +425,8 @@ xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key,
"'%lf' in 'option %s %s'"
" is out of range [%.0f - %.0f]",
size, key, value, opt->min, opt->max);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s",
- errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
goto out;
}
ret = 0;
@@ -430,7 +437,8 @@ xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key,
snprintf(errstr, 256, "invalid number format \"%s\" in \"option %s\"",
value, key);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", errstr,
+ NULL);
out:
if (ret && op_errstr)
@@ -452,7 +460,8 @@ xlator_option_validate_time(xlator_t *xl, const char *key, const char *value,
"invalid time format \"%s\" in "
"\"option %s\"",
value, key);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -471,7 +480,8 @@ xlator_option_validate_time(xlator_t *xl, const char *key, const char *value,
"' in 'option %s %s' is "
"out of range [%.0f - %.0f]",
input_time, key, value, opt->min, opt->max);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -494,7 +504,8 @@ xlator_option_validate_double(xlator_t *xl, const char *key, const char *value,
if (gf_string2double(value, &input) != 0) {
snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
value, key);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -514,8 +525,8 @@ xlator_option_validate_double(xlator_t *xl, const char *key, const char *value,
"'%f' in 'option %s %s' is smaller than "
"minimum value '%f'",
input, key, value, opt->min);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s",
- errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
} else if (opt->validate == GF_OPT_VALIDATE_MAX) {
@@ -524,8 +535,8 @@ xlator_option_validate_double(xlator_t *xl, const char *key, const char *value,
"'%f' in 'option %s %s' is greater than "
"maximum value '%f'",
input, key, value, opt->max);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s",
- errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
goto out;
}
} else if ((input < opt->min) || (input > opt->max)) {
@@ -533,7 +544,8 @@ xlator_option_validate_double(xlator_t *xl, const char *key, const char *value,
"'%f' in 'option %s %s' is out of range "
"[%f - %f]",
input, key, value, opt->min, opt->max);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
goto out;
}
@@ -551,10 +563,11 @@ xlator_option_validate_addr(xlator_t *xl, const char *key, const char *value,
int ret = -1;
char errstr[256];
- if (!valid_internet_address((char *)value, _gf_false)) {
+ if (!valid_internet_address((char *)value, _gf_false, _gf_false)) {
snprintf(errstr, 256, "option %s %s: Can not parse %s address", key,
value, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
if (op_errstr)
*op_errstr = gf_strdup(errstr);
}
@@ -580,9 +593,6 @@ xlator_option_validate_addr_list(xlator_t *xl, const char *key,
char *addr_list = NULL;
char *addr = NULL;
char *dir = NULL;
- char errstr[4096] = {
- 0,
- };
dup_val = gf_strdup(value);
if (!dup_val)
@@ -592,7 +602,7 @@ xlator_option_validate_addr_list(xlator_t *xl, const char *key,
/* Possible old format, handle it for back-ward compatibility */
addr_tok = strtok_r(dup_val, ",", &save_ptr);
while (addr_tok) {
- if (!valid_internet_address(addr_tok, _gf_true))
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_true))
goto out;
addr_tok = strtok_r(NULL, ",", &save_ptr);
@@ -627,7 +637,7 @@ xlator_option_validate_addr_list(xlator_t *xl, const char *key,
if (addr_tok == NULL)
goto out;
while (addr_tok) {
- if (!valid_internet_address(addr_tok, _gf_true))
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_true))
goto out;
addr_tok = strtok_r(NULL, "|", &save_ptr);
@@ -643,11 +653,13 @@ xlator_option_validate_addr_list(xlator_t *xl, const char *key,
out:
if (ret) {
+ char errstr[4096];
snprintf(errstr, sizeof(errstr),
"option %s %s: '%s' is not "
"a valid internet-address-list",
key, value, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
if (op_errstr)
*op_errstr = gf_strdup(errstr);
}
@@ -665,9 +677,6 @@ xlator_option_validate_mntauth(xlator_t *xl, const char *key, const char *value,
char *dup_val = NULL;
char *addr_tok = NULL;
char *save_ptr = NULL;
- char errstr[4096] = {
- 0,
- };
dup_val = gf_strdup(value);
if (!dup_val)
@@ -686,11 +695,13 @@ xlator_option_validate_mntauth(xlator_t *xl, const char *key, const char *value,
out:
if (ret) {
+ char errstr[4096];
snprintf(errstr, sizeof(errstr),
"option %s %s: '%s' is not "
"a valid mount-auth-address",
key, value, value);
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "%s", errstr);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
if (op_errstr)
*op_errstr = gf_strdup(errstr);
}
@@ -756,20 +767,16 @@ validate_list_elements(const char *string, volume_option_t *opt,
key = strtok_r(str_ptr, ":", &substr_sav);
if (!key || (key_validator && key_validator(key))) {
ret = -1;
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
- "invalid list '%s', key "
- "'%s' not valid.",
- string, key);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
+ "list=%s", string, "key=%s", key ? key : "", NULL);
goto out;
}
value = strtok_r(NULL, ":", &substr_sav);
if (!value || (value_validator && value_validator(value, opt))) {
ret = -1;
- gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
- "invalid list '%s', "
- "value '%s' not valid.",
- string, key);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
+ "list=%s", string, "value=%s", key, NULL);
goto out;
}
@@ -874,8 +881,8 @@ xlator_option_validate(xlator_t *xl, char *key, char *value,
};
if (opt->type > GF_OPTION_TYPE_MAX) {
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
- "unknown option type '%d'", opt->type);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_UNKNOWN_OPTION_TYPE,
+ "type=%d", opt->type, NULL);
goto out;
}
@@ -891,7 +898,6 @@ xlator_volume_option_get_list(volume_opt_list_t *vol_list, const char *key)
{
volume_option_t *opt = NULL;
volume_opt_list_t *opt_list = NULL;
- volume_option_t *found = NULL;
int index = 0;
int i = 0;
char *cmp_key = NULL;
@@ -908,13 +914,12 @@ xlator_volume_option_get_list(volume_opt_list_t *vol_list, const char *key)
if (!cmp_key)
break;
if (fnmatch(cmp_key, key, FNM_NOESCAPE) == 0) {
- found = &opt[index];
- goto out;
+ return &opt[index];
}
}
}
-out:
- return found;
+
+ return NULL;
}
volume_option_t *
@@ -958,18 +963,16 @@ xl_opt_validate(dict_t *dict, char *key, data_t *value, void *data)
ret = xlator_option_validate(xl, key, value->data, opt, &errstr);
if (ret)
- gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_VALIDATE_RETURNS,
- "validate of %s returned %d", key, ret);
+ gf_smsg(xl->name, GF_LOG_WARNING, 0, LG_MSG_VALIDATE_RETURNS, "key=%s",
+ key, "ret=%d", ret, NULL);
if (errstr)
/* possible small leak of previously set stub->errstr */
stub->errstr = errstr;
if (fnmatch(opt->key[0], key, FNM_NOESCAPE) != 0) {
- gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
- "option '%s' is deprecated, preferred is '%s', "
- "continuing with correction",
- key, opt->key[0]);
+ gf_smsg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_OPTION_DEPRECATED, "key=%s",
+ key, "preferred=%s", opt->key[0], NULL);
dict_set(dict, opt->key[0], value);
dict_del(dict, key);
}
@@ -1037,9 +1040,8 @@ xlator_validate_rec(xlator_t *xlator, char **op_errstr)
while (trav) {
if (xlator_validate_rec(trav->xlator, op_errstr)) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_VALIDATE_REC_FAILED,
- "validate_rec "
- "failed");
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_VALIDATE_REC_FAILED,
+ NULL);
goto out;
}
@@ -1063,8 +1065,8 @@ xlator_validate_rec(xlator_t *xlator, char **op_errstr)
THIS = old_THIS;
if (ret) {
- gf_msg(xlator->name, GF_LOG_INFO, 0, LG_MSG_INVALID_ENTRY, "%s",
- *op_errstr);
+ gf_smsg(xlator->name, GF_LOG_INFO, 0, LG_MSG_INVALID_ENTRY, "%s",
+ *op_errstr, NULL);
goto out;
}
diff --git a/libglusterfs/src/parse-utils.c b/libglusterfs/src/parse-utils.c
index d3fd0963507..4531d5f0170 100644
--- a/libglusterfs/src/parse-utils.c
+++ b/libglusterfs/src/parse-utils.c
@@ -14,13 +14,11 @@
#include <regex.h>
#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include "parse-utils.h"
-#include "mem-pool.h"
-#include "common-utils.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/parse-utils.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
/**
* parser_init: Initialize a parser with the a string to parse and
diff --git a/libglusterfs/src/quota-common-utils.c b/libglusterfs/src/quota-common-utils.c
index fe6736493c6..804e2f0ad4b 100644
--- a/libglusterfs/src/quota-common-utils.c
+++ b/libglusterfs/src/quota-common-utils.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "quota-common-utils.h"
-#include "common-utils.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/byte-order.h"
+#include "glusterfs/quota-common-utils.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
gf_boolean_t
quota_meta_is_null(const quota_meta_t *meta)
diff --git a/libglusterfs/src/rbthash.c b/libglusterfs/src/rbthash.c
index 0068b7b881e..c90b5a21f44 100644
--- a/libglusterfs/src/rbthash.c
+++ b/libglusterfs/src/rbthash.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "rbthash.h"
+#include "glusterfs/rbthash.h"
#include "rb.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "logging.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/libglusterfs-messages.h"
#include <pthread.h>
#include <string.h>
@@ -56,9 +56,8 @@ __rbthash_init_buckets(rbthash_table_t *tbl, int buckets)
tbl->buckets[i].bucket = rb_create(
(rb_comparison_func *)rbthash_comparator, tbl, NULL);
if (!tbl->buckets[i].bucket) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RB_TABLE_CREATE_FAILED,
- "Failed to "
- "create rb table bucket");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RB_TABLE_CREATE_FAILED,
+ NULL);
ret = -1;
goto err;
}
@@ -88,20 +87,17 @@ rbthash_table_init(glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc,
int ret = -1;
if (!hfunc) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_HASH_FUNC_ERROR,
- "Hash function not given");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_HASH_FUNC_ERROR, NULL);
return NULL;
}
if (!entrypool && !expected_entries) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
- "Both mem-pool and expected entries not provided");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_ENTRIES_NOT_PROVIDED, NULL);
return NULL;
}
if (entrypool && expected_entries) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
- "Both mem-pool and expected entries are provided");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_ENTRIES_PROVIDED, NULL);
return NULL;
}
@@ -132,8 +128,8 @@ rbthash_table_init(glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc,
ret = __rbthash_init_buckets(newtab, buckets);
if (ret == -1) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_BUCKET_FAILED,
- "Failed to init buckets");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_BUCKET_FAILED,
+ NULL);
if (newtab->pool_alloced)
mem_pool_destroy(newtab->entrypool);
} else {
@@ -170,8 +166,8 @@ rbthash_init_entry(rbthash_table_t *tbl, void *data, void *key, int keylen)
entry = mem_get(tbl->entrypool);
if (!entry) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_ENTRY_FAILED,
- "Failed to get entry from mem-pool");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_ENTRY_FAILED,
+ NULL);
goto ret;
}
@@ -243,8 +239,8 @@ rbthash_insert_entry(rbthash_table_t *tbl, rbthash_entry_t *entry)
bucket = rbthash_entry_bucket(tbl, entry);
if (!bucket) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
- "Failed to get bucket");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
goto err;
}
@@ -252,10 +248,11 @@ rbthash_insert_entry(rbthash_table_t *tbl, rbthash_entry_t *entry)
LOCK(&bucket->bucketlock);
{
if (!rb_probe(bucket->bucket, (void *)entry)) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED,
- "Failed to insert"
- " entry");
+ UNLOCK(&bucket->bucketlock);
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED,
+ NULL);
ret = -1;
+ goto err;
}
}
UNLOCK(&bucket->bucketlock);
@@ -275,17 +272,18 @@ rbthash_insert(rbthash_table_t *tbl, void *data, void *key, int keylen)
entry = rbthash_init_entry(tbl, data, key, keylen);
if (!entry) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_ENTRY_FAILED,
- "Failed to init entry");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_ENTRY_FAILED,
+ NULL);
goto err;
}
ret = rbthash_insert_entry(tbl, entry);
if (ret == -1) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED,
- "Failed to insert entry");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED,
+ NULL);
rbthash_deinit_entry(tbl, entry);
+ goto err;
}
LOCK(&tbl->tablelock);
@@ -329,8 +327,8 @@ rbthash_get(rbthash_table_t *tbl, void *key, int keylen)
bucket = rbthash_key_bucket(tbl, key, keylen);
if (!bucket) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_GET_BUCKET_FAILED,
- "Failed to get bucket");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
return NULL;
}
@@ -363,8 +361,8 @@ rbthash_remove(rbthash_table_t *tbl, void *key, int keylen)
bucket = rbthash_key_bucket(tbl, key, keylen);
if (!bucket) {
- gf_msg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
- "Failed to get bucket");
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
return NULL;
}
diff --git a/libglusterfs/src/refcount.c b/libglusterfs/src/refcount.c
index a522d86a677..d5a5a82fa0f 100644
--- a/libglusterfs/src/refcount.c
+++ b/libglusterfs/src/refcount.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
-#include "refcount.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/refcount.h"
#ifndef REFCOUNT_NEEDS_LOCK
diff --git a/libglusterfs/src/rot-buffs.c b/libglusterfs/src/rot-buffs.c
index 560d9f9805e..260bf16ecea 100644
--- a/libglusterfs/src/rot-buffs.c
+++ b/libglusterfs/src/rot-buffs.c
@@ -10,10 +10,10 @@
#include <math.h>
-#include "mem-types.h"
-#include "mem-pool.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/mem-pool.h"
-#include "rot-buffs.h"
+#include "glusterfs/rot-buffs.h"
/**
* Producer-Consumer based on top of rotational buffers.
diff --git a/libglusterfs/src/run.c b/libglusterfs/src/run.c
index 41275066b20..58f95a7e610 100644
--- a/libglusterfs/src/run.c
+++ b/libglusterfs/src/run.c
@@ -23,7 +23,7 @@
#include <assert.h>
#include <signal.h>
#include <sys/wait.h>
-#include "syscall.h"
+#include "glusterfs/syscall.h"
/*
* Following defines are available for helping development:
@@ -92,12 +92,12 @@ close_fds_except(int *fdv, size_t count)
#define GF_LINUX_HOST_OS
#endif
#else /* ! RUN_STANDALONE || RUN_DO_DEMO */
-#include "glusterfs.h"
-#include "common-utils.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
#endif
-#include "run.h"
+#include "glusterfs/run.h"
void
runinit(runner_t *runner)
{
@@ -132,7 +132,7 @@ runner_insert_arg(runner_t *runner, char *arg)
GF_ASSERT(arg);
- if (runner->runerr)
+ if (runner->runerr || !runner->argv)
return;
for (i = 0; i < runner->argvlen; i++) {
@@ -263,8 +263,8 @@ runner_start(runner_t *runner)
int i = 0;
sigset_t set;
- if (runner->runerr) {
- errno = runner->runerr;
+ if (runner->runerr || !runner->argv) {
+ errno = (runner->runerr) ? runner->runerr : EINVAL;
return -1;
}
diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c
index c06e2dc378d..1531f0da43f 100644
--- a/libglusterfs/src/stack.c
+++ b/libglusterfs/src/stack.c
@@ -8,15 +8,16 @@
cases as published by the Free Software Foundation.
*/
-#include "statedump.h"
-#include "stack.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/libglusterfs-messages.h"
call_frame_t *
create_frame(xlator_t *xl, call_pool_t *pool)
{
call_stack_t *stack = NULL;
call_frame_t *frame = NULL;
+ static uint64_t unique = 0;
if (!xl || !pool) {
return NULL;
@@ -52,6 +53,7 @@ create_frame(xlator_t *xl, call_pool_t *pool)
{
list_add(&stack->all_frames, &pool->all_frames);
pool->cnt++;
+ stack->unique = unique++;
}
UNLOCK(&pool->lock);
GF_ATOMIC_INC(pool->total_count);
@@ -90,7 +92,7 @@ gf_proc_dump_call_frame(call_frame_t *call_frame, const char *key_buf, ...)
};
int ret = -1;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
int len;
@@ -160,7 +162,7 @@ gf_proc_dump_call_stack(call_stack_t *call_stack, const char *key_buf, ...)
va_list ap;
call_frame_t *trav;
int32_t i = 1, cnt = 0;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
int len;
diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
index bab95855935..65f0eb5c7f3 100644
--- a/libglusterfs/src/statedump.c
+++ b/libglusterfs/src/statedump.c
@@ -9,13 +9,11 @@
*/
#include <stdarg.h>
-#include "glusterfs.h"
-#include "logging.h"
-#include "iobuf.h"
-#include "statedump.h"
-#include "stack.h"
-#include "common-utils.h"
-#include "syscall.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/syscall.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
@@ -201,6 +199,40 @@ gf_proc_dump_write(char *key, char *value, ...)
return ret;
}
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat)
+{
+ /* Doesn't make sense to continue if there are no fops
+ came in the given interval */
+ if (!lat || !lat->count)
+ return;
+ gf_proc_dump_write(key,
+ "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64
+ " MAX:%" PRIu64,
+ (((double)lat->total) / lat->count), lat->count,
+ lat->total, lat->min, lat->max);
+ gf_latency_reset(lat);
+}
+
+void
+gf_proc_dump_xl_latency_info(xlator_t *xl)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
+
+ gf_latency_t *lat = &xl->stats.interval.latencies[i];
+
+ gf_latency_statedump_and_reset(key, lat);
+ }
+}
+
static void
gf_proc_dump_xlator_mem_info(xlator_t *xl)
{
@@ -272,7 +304,7 @@ gf_proc_dump_xlator_mem_info_only_in_use(xlator_t *xl)
void
gf_proc_dump_mem_info()
{
-#ifdef HAVE_MALLOC_STATS
+#ifdef HAVE_MALLINFO
struct mallinfo info;
memset(&info, 0, sizeof(struct mallinfo));
@@ -298,7 +330,7 @@ gf_proc_dump_mem_info_to_dict(dict_t *dict)
{
if (!dict)
return;
-#ifdef HAVE_MALLOC_STATS
+#ifdef HAVE_MALLINFO
struct mallinfo info;
int ret = -1;
@@ -351,26 +383,13 @@ gf_proc_dump_mem_info_to_dict(dict_t *dict)
void
gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx)
{
+#ifdef GF_DISABLE_MEMPOOL
+ gf_proc_dump_write("built with --disable-mempool", " so no memory pools");
+#else
struct mem_pool *pool = NULL;
gf_proc_dump_add_section("mempool");
-#if defined(OLD_MEM_POOLS)
- list_for_each_entry(pool, &ctx->mempool_list, global_list)
- {
- gf_proc_dump_write("-----", "-----");
- gf_proc_dump_write("pool-name", "%s", pool->name);
- gf_proc_dump_write("hot-count", "%d", pool->hot_count);
- gf_proc_dump_write("cold-count", "%d", pool->cold_count);
- gf_proc_dump_write("padded_sizeof", "%lu", pool->padded_sizeof_type);
- gf_proc_dump_write("alloc-count", "%" PRIu64, pool->alloc_count);
- gf_proc_dump_write("max-alloc", "%d", pool->max_alloc);
-
- gf_proc_dump_write("pool-misses", "%" PRIu64, pool->pool_misses);
- gf_proc_dump_write("cur-stdalloc", "%d", pool->curr_stdalloc);
- gf_proc_dump_write("max-stdalloc", "%d", pool->max_stdalloc);
- }
-#else
LOCK(&ctx->lock);
{
list_for_each_entry(pool, &ctx->mempool_list, owner)
@@ -379,25 +398,24 @@ gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx)
gf_proc_dump_write("-----", "-----");
gf_proc_dump_write("pool-name", "%s", pool->name);
+ gf_proc_dump_write("xlator-name", "%s", pool->xl_name);
gf_proc_dump_write("active-count", "%" GF_PRI_ATOMIC, active);
gf_proc_dump_write("sizeof-type", "%lu", pool->sizeof_type);
gf_proc_dump_write("padded-sizeof", "%d",
1 << pool->pool->power_of_two);
- gf_proc_dump_write("size", "%lu",
+ gf_proc_dump_write("size", "%" PRId64,
(1 << pool->pool->power_of_two) * active);
gf_proc_dump_write("shared-pool", "%p", pool->pool);
}
}
UNLOCK(&ctx->lock);
-
- /* TODO: details of (struct mem_pool_shared) pool->pool */
-#endif
+#endif /* GF_DISABLE_MEMPOOL */
}
void
gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict)
{
-#if defined(OLD_MEM_POOLS)
+#ifndef GF_DISABLE_MEMPOOL
struct mem_pool *pool = NULL;
char key[GF_DUMP_MAX_BUF_LEN] = {
0,
@@ -408,51 +426,47 @@ gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict)
if (!ctx || !dict)
return;
- list_for_each_entry(pool, &ctx->mempool_list, global_list)
+ LOCK(&ctx->lock);
{
- snprintf(key, sizeof(key), "pool%d.name", count);
- ret = dict_set_str(dict, key, pool->name);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.hotcount", count);
- ret = dict_set_int32(dict, key, pool->hot_count);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.coldcount", count);
- ret = dict_set_int32(dict, key, pool->cold_count);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.paddedsizeof", count);
- ret = dict_set_uint64(dict, key, pool->padded_sizeof_type);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.alloccount", count);
- ret = dict_set_uint64(dict, key, pool->alloc_count);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.max_alloc", count);
- ret = dict_set_int32(dict, key, pool->max_alloc);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.max-stdalloc", count);
- ret = dict_set_int32(dict, key, pool->max_stdalloc);
- if (ret)
- return;
-
- snprintf(key, sizeof(key), "pool%d.pool-misses", count);
- ret = dict_set_uint64(dict, key, pool->pool_misses);
- if (ret)
- return;
- count++;
+ list_for_each_entry(pool, &ctx->mempool_list, owner)
+ {
+ int64_t active = GF_ATOMIC_GET(pool->active);
+
+ snprintf(key, sizeof(key), "pool%d.name", count);
+ ret = dict_set_str(dict, key, pool->name);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.active-count", count);
+ ret = dict_set_uint64(dict, key, active);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.sizeof-type", count);
+ ret = dict_set_uint64(dict, key, pool->sizeof_type);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.padded-sizeof", count);
+ ret = dict_set_uint64(dict, key, 1 << pool->pool->power_of_two);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.size", count);
+ ret = dict_set_uint64(dict, key,
+ (1 << pool->pool->power_of_two) * active);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.shared-pool", count);
+ ret = dict_set_static_ptr(dict, key, pool->pool);
+ if (ret)
+ goto out;
+ }
}
- ret = dict_set_int32(dict, "mempool-count", count);
-#endif
+out:
+ UNLOCK(&ctx->lock);
+#endif /* !GF_DISABLE_MEMPOOL */
}
void
@@ -461,17 +475,17 @@ gf_proc_dump_latency_info(xlator_t *xl);
void
gf_proc_dump_dict_info(glusterfs_ctx_t *ctx)
{
- uint64_t total_dicts = 0;
- uint64_t total_pairs = 0;
+ int64_t total_dicts = 0;
+ int64_t total_pairs = 0;
total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used);
total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used);
gf_proc_dump_write("max-pairs-per-dict", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ctx->stats.max_dict_pairs));
- gf_proc_dump_write("total-pairs-used", "%lu", total_pairs);
- gf_proc_dump_write("total-dicts-used", "%lu", total_dicts);
- gf_proc_dump_write("average-pairs-per-dict", "%lu",
+ gf_proc_dump_write("total-pairs-used", "%" PRId64, total_pairs);
+ gf_proc_dump_write("total-dicts-used", "%" PRId64, total_dicts);
+ gf_proc_dump_write("average-pairs-per-dict", "%" PRId64,
(total_pairs / total_dicts));
}
@@ -487,7 +501,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav)
return;
if (ctx->measure_latency)
- gf_proc_dump_latency_info(trav);
+ gf_proc_dump_xl_latency_info(trav);
gf_proc_dump_xlator_mem_info(trav);
@@ -784,7 +798,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
char brick_name[PATH_MAX] = {
0,
};
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char sign_string[512] = {
@@ -805,12 +819,21 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
int brick_count = 0;
int len = 0;
- gf_proc_dump_lock();
+ gf_msg_trace("dump", 0, "received statedump request (sig:USR1)");
if (!ctx)
goto out;
- if (ctx && ctx->active) {
+ /*
+ * Multiplexed daemons can change the active graph when attach/detach
+ * is called. So this has to be protected with the cleanup lock.
+ */
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ gf_proc_dump_lock();
+
+ if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) &&
+ (ctx && ctx->active)) {
top = ctx->active->first;
for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
brick_count++;
@@ -835,7 +858,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
? dump_options.dump_path
: ((ctx->statedump_path != NULL) ? ctx->statedump_path
: DEFAULT_VAR_RUN_DIRECTORY)),
- brick_name, getpid(), (uint64_t)time(NULL));
+ brick_name, getpid(), (uint64_t)gf_time());
if ((ret < 0) || (ret >= sizeof(path))) {
goto out;
}
@@ -854,10 +877,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
// continue even though gettimeofday() has failed
ret = gettimeofday(&tv, NULL);
if (0 == ret) {
- gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- len = strlen(timestr);
- snprintf(timestr + len, sizeof timestr - len, ".%" GF_PRI_SUSECONDS,
- tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
}
len = snprintf(sign_string, sizeof(sign_string), "DUMP-START-TIME: %s\n",
@@ -906,10 +926,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
ret = gettimeofday(&tv, NULL);
if (0 == ret) {
- gf_time_fmt(timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- len = strlen(timestr);
- snprintf(timestr + len, sizeof timestr - len, ".%" GF_PRI_SUSECONDS,
- tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
}
len = snprintf(sign_string, sizeof(sign_string), "\nDUMP-END-TIME: %s",
@@ -922,7 +939,11 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
out:
GF_FREE(dump_options.dump_path);
dump_options.dump_path = NULL;
- gf_proc_dump_unlock();
+ if (ctx) {
+ gf_proc_dump_unlock();
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+ }
return;
}
@@ -1024,7 +1045,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd)
{
gf_dump_strfd = strfd;
- gf_proc_dump_latency_info(this);
+ gf_proc_dump_xl_latency_info(this);
gf_dump_strfd = NULL;
}
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
index 7e03c2e332b..5c316b9291a 100644
--- a/libglusterfs/src/store.c
+++ b/libglusterfs/src/store.c
@@ -11,19 +11,18 @@
#include <inttypes.h>
#include <libgen.h>
-#include "glusterfs.h"
-#include "store.h"
-#include "dict.h"
-#include "xlator.h"
-#include "syscall.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/store.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
int32_t
gf_store_mkdir(char *path)
{
int32_t ret = -1;
- ret = mkdir_p(path, 0777, _gf_true);
+ ret = mkdir_p(path, 0755, _gf_true);
if ((-1 == ret) && (EEXIST != errno)) {
gf_msg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
@@ -184,8 +183,8 @@ out:
}
int
-gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
- char **iter_val, gf_store_op_errno_t *store_errno)
+gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+ gf_store_op_errno_t *store_errno)
{
int32_t ret = -1;
char *savetok = NULL;
@@ -193,15 +192,15 @@ gf_store_read_and_tokenize(FILE *file, char *str, int size, char **iter_key,
char *value = NULL;
char *temp = NULL;
size_t str_len = 0;
+ char str[8192];
GF_ASSERT(file);
- GF_ASSERT(str);
GF_ASSERT(iter_key);
GF_ASSERT(iter_val);
GF_ASSERT(store_errno);
retry:
- temp = fgets(str, size, file);
+ temp = fgets(str, 8192, file);
if (temp == NULL || feof(file)) {
ret = -1;
*store_errno = GD_STORE_EOF;
@@ -241,13 +240,8 @@ int32_t
gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
{
int32_t ret = -1;
- char *scan_str = NULL;
char *iter_key = NULL;
char *iter_val = NULL;
- char *free_str = NULL;
- struct stat st = {
- 0,
- };
gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
GF_ASSERT(handle);
@@ -279,32 +273,9 @@ gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
} else {
fseek(handle->read, 0, SEEK_SET);
}
- ret = sys_fstat(handle->fd, &st);
- if (ret < 0) {
- gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
- "stat on file %s failed", handle->path);
- ret = -1;
- store_errno = GD_STORE_STAT_FAILED;
- goto out;
- }
-
- /* "st.st_size + 1" is used as we are fetching each
- * line of a file using fgets, fgets will append "\0"
- * to the end of the string
- */
- scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char);
-
- if (scan_str == NULL) {
- ret = -1;
- store_errno = GD_STORE_ENOMEM;
- goto out;
- }
-
- free_str = scan_str;
-
do {
- ret = gf_store_read_and_tokenize(handle->read, scan_str, st.st_size + 1,
- &iter_key, &iter_val, &store_errno);
+ ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
+ &store_errno);
if (ret < 0) {
gf_msg_trace("", 0,
"error while reading key '%s': "
@@ -334,8 +305,6 @@ out:
sys_close(handle->fd);
}
- GF_FREE(free_str);
-
return ret;
}
@@ -388,6 +357,53 @@ out:
}
int32_t
+gf_store_save_items(int fd, char *items)
+{
+ int32_t ret = -1;
+ int dup_fd = -1;
+ FILE *fp = NULL;
+
+ GF_ASSERT(fd > 0);
+ GF_ASSERT(items);
+
+ dup_fd = dup(fd);
+ if (dup_fd == -1)
+ goto out;
+
+ fp = fdopen(dup_fd, "a+");
+ if (fp == NULL) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fdopen failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = fputs(items, fp);
+ if (ret < 0) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to store items: %s", items);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fflush(fp);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fflush failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+
+ gf_msg_debug(THIS->name, 0, "returning: %d", ret);
+ return ret;
+}
+
+int32_t
gf_store_handle_new(const char *path, gf_store_handle_t **handle)
{
int32_t ret = -1;
@@ -561,40 +577,16 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
gf_store_op_errno_t *op_errno)
{
int32_t ret = -1;
- char *scan_str = NULL;
char *iter_key = NULL;
char *iter_val = NULL;
- struct stat st = {
- 0,
- };
gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
GF_ASSERT(iter);
GF_ASSERT(key);
GF_ASSERT(value);
- ret = sys_stat(iter->filepath, &st);
- if (ret < 0) {
- gf_msg("", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
- "stat on file failed");
- ret = -1;
- store_errno = GD_STORE_STAT_FAILED;
- goto out;
- }
-
- /* "st.st_size + 1" is used as we are fetching each
- * line of a file using fgets, fgets will append "\0"
- * to the end of the string
- */
- scan_str = GF_CALLOC(1, st.st_size + 1, gf_common_mt_char);
- if (!scan_str) {
- ret = -1;
- store_errno = GD_STORE_ENOMEM;
- goto out;
- }
-
- ret = gf_store_read_and_tokenize(iter->file, scan_str, st.st_size + 1,
- &iter_key, &iter_val, &store_errno);
+ ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
+ &store_errno);
if (ret < 0) {
goto out;
}
@@ -619,7 +611,6 @@ gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
ret = 0;
out:
- GF_FREE(scan_str);
if (ret) {
GF_FREE(*key);
GF_FREE(*value);
@@ -658,23 +649,25 @@ out:
}
int32_t
-gf_store_iter_destroy(gf_store_iter_t *iter)
+gf_store_iter_destroy(gf_store_iter_t **iter)
{
int32_t ret = -1;
- if (!iter)
+ if (!(*iter))
return 0;
/* gf_store_iter_new will not return a valid iter object with iter->file
* being NULL*/
- ret = fclose(iter->file);
+ ret = fclose((*iter)->file);
if (ret)
gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
"Unable"
" to close file: %s, ret: %d",
- iter->filepath, ret);
+ (*iter)->filepath, ret);
+
+ GF_FREE(*iter);
+ *iter = NULL;
- GF_FREE(iter);
return ret;
}
diff --git a/libglusterfs/src/strfd.c b/libglusterfs/src/strfd.c
index ad66b622361..8a2580edc85 100644
--- a/libglusterfs/src/strfd.c
+++ b/libglusterfs/src/strfd.c
@@ -10,10 +10,10 @@
#include <stdarg.h>
-#include "mem-types.h"
-#include "mem-pool.h"
-#include "strfd.h"
-#include "common-utils.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/strfd.h"
+#include "glusterfs/common-utils.h"
strfd_t *
strfd_open()
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
index d2a71698745..d9f1723856d 100644
--- a/libglusterfs/src/syncop-utils.c
+++ b/libglusterfs/src/syncop-utils.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "syncop.h"
-#include "syncop-utils.h"
-#include "common-utils.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/syncop.h"
+#include "glusterfs/syncop-utils.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
struct syncop_dir_scan_data {
xlator_t *subvol;
@@ -345,11 +345,16 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
gf_dirent_t *tmp = NULL;
uint32_t jobs_running = 0;
uint32_t qlen = 0;
- pthread_cond_t cond;
- pthread_mutex_t mut;
- gf_boolean_t cond_init = _gf_false;
- gf_boolean_t mut_init = _gf_false;
+ pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+ pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
gf_dirent_t entries;
+ xlator_t *this = NULL;
+
+ if (frame) {
+ this = frame->this;
+ } else {
+ this = THIS;
+ }
/*For this functionality to be implemented in general, we need
* synccond_t infra which doesn't block the executing thread. Until then
@@ -371,15 +376,6 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
INIT_LIST_HEAD(&entries.list);
INIT_LIST_HEAD(&q.list);
- ret = pthread_mutex_init(&mut, NULL);
- if (ret)
- goto out;
- mut_init = _gf_true;
-
- ret = pthread_cond_init(&cond, NULL);
- if (ret)
- goto out;
- cond_init = _gf_true;
while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, xdata,
NULL))) {
@@ -397,13 +393,16 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
list_for_each_entry_safe(entry, tmp, &entries.list, list)
{
+ if (this && this->cleanup_starting)
+ goto out;
+
list_del_init(&entry->list);
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) {
gf_dirent_entry_free(entry);
continue;
}
- if (entry->d_type == IA_IFDIR) {
+ if (entry->d_stat.ia_type == IA_IFDIR) {
ret = fn(subvol, entry, loc, data);
gf_dirent_entry_free(entry);
if (ret)
@@ -442,21 +441,17 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
out:
if (fd)
fd_unref(fd);
- if (mut_init && cond_init) {
- pthread_mutex_lock(&mut);
- {
- while (jobs_running)
- pthread_cond_wait(&cond, &mut);
- }
- pthread_mutex_unlock(&mut);
- gf_dirent_free(&q);
- gf_dirent_free(&entries);
+
+ pthread_mutex_lock(&mut);
+ {
+ while (jobs_running)
+ pthread_cond_wait(&cond, &mut);
}
+ pthread_mutex_unlock(&mut);
+
+ gf_dirent_free(&q);
+ gf_dirent_free(&entries);
- if (mut_init)
- pthread_mutex_destroy(&mut);
- if (cond_init)
- pthread_cond_destroy(&cond);
return ret | retval;
}
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index edf978d797d..df20cec559f 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -8,8 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "syncop.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/syncop.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+#ifdef HAVE_TSAN_API
+#include <sanitizer/tsan_interface.h>
+#endif
int
syncopctx_setfsuid(void *uid)
@@ -26,28 +30,10 @@ syncopctx_setfsuid(void *uid)
opctx = syncopctx_getctx();
- /* alloc for this thread the first time */
- if (!opctx) {
- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
- if (!opctx) {
- ret = -1;
- goto out;
- }
-
- ret = syncopctx_setctx(opctx);
- if (ret != 0) {
- GF_FREE(opctx);
- opctx = NULL;
- goto out;
- }
- }
+ opctx->uid = *(uid_t *)uid;
+ opctx->valid |= SYNCOPCTX_UID;
out:
- if (opctx && uid) {
- opctx->uid = *(uid_t *)uid;
- opctx->valid |= SYNCOPCTX_UID;
- }
-
return ret;
}
@@ -66,28 +52,10 @@ syncopctx_setfsgid(void *gid)
opctx = syncopctx_getctx();
- /* alloc for this thread the first time */
- if (!opctx) {
- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
- if (!opctx) {
- ret = -1;
- goto out;
- }
-
- ret = syncopctx_setctx(opctx);
- if (ret != 0) {
- GF_FREE(opctx);
- opctx = NULL;
- goto out;
- }
- }
+ opctx->gid = *(gid_t *)gid;
+ opctx->valid |= SYNCOPCTX_GID;
out:
- if (opctx && gid) {
- opctx->gid = *(gid_t *)gid;
- opctx->valid |= SYNCOPCTX_GID;
- }
-
return ret;
}
@@ -107,43 +75,20 @@ syncopctx_setfsgroups(int count, const void *groups)
opctx = syncopctx_getctx();
- /* alloc for this thread the first time */
- if (!opctx) {
- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
- if (!opctx) {
- ret = -1;
- goto out;
- }
-
- ret = syncopctx_setctx(opctx);
- if (ret != 0) {
- GF_FREE(opctx);
- opctx = NULL;
- goto out;
- }
- }
-
/* resize internal groups as required */
if (count && opctx->grpsize < count) {
if (opctx->groups) {
- tmpgroups = GF_REALLOC(opctx->groups, (sizeof(gid_t) * count));
- /* NOTE: Not really required to zero the reallocation,
- * as ngrps controls the validity of data,
- * making a note irrespective */
- if (tmpgroups == NULL) {
- opctx->grpsize = 0;
- GF_FREE(opctx->groups);
- opctx->groups = NULL;
- ret = -1;
- goto out;
- }
- } else {
- tmpgroups = GF_CALLOC(count, sizeof(gid_t), gf_common_mt_syncopctx);
- if (tmpgroups == NULL) {
- opctx->grpsize = 0;
- ret = -1;
- goto out;
- }
+ /* Group list will be updated later, so no need to keep current
+ * data and waste time copying it. It's better to free the current
+ * allocation and then allocate a fresh new memory block. */
+ GF_FREE(opctx->groups);
+ opctx->groups = NULL;
+ opctx->grpsize = 0;
+ }
+ tmpgroups = GF_MALLOC(count * sizeof(gid_t), gf_common_mt_syncopctx);
+ if (tmpgroups == NULL) {
+ ret = -1;
+ goto out;
}
opctx->groups = tmpgroups;
@@ -156,6 +101,13 @@ syncopctx_setfsgroups(int count, const void *groups)
/* set/reset the ngrps, this is where reset of groups is handled */
opctx->ngrps = count;
+
+ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) {
+ /* This is the first time we are storing groups into the TLS structure
+ * so we mark the current thread so that it will be properly cleaned
+ * up when the thread terminates. */
+ gf_thread_needs_cleanup();
+ }
opctx->valid |= SYNCOPCTX_GROUPS;
out:
@@ -177,28 +129,10 @@ syncopctx_setfspid(void *pid)
opctx = syncopctx_getctx();
- /* alloc for this thread the first time */
- if (!opctx) {
- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
- if (!opctx) {
- ret = -1;
- goto out;
- }
-
- ret = syncopctx_setctx(opctx);
- if (ret != 0) {
- GF_FREE(opctx);
- opctx = NULL;
- goto out;
- }
- }
+ opctx->pid = *(pid_t *)pid;
+ opctx->valid |= SYNCOPCTX_PID;
out:
- if (opctx && pid) {
- opctx->pid = *(pid_t *)pid;
- opctx->valid |= SYNCOPCTX_PID;
- }
-
return ret;
}
@@ -217,35 +151,21 @@ syncopctx_setfslkowner(gf_lkowner_t *lk_owner)
opctx = syncopctx_getctx();
- /* alloc for this thread the first time */
- if (!opctx) {
- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx);
- if (!opctx) {
- ret = -1;
- goto out;
- }
-
- ret = syncopctx_setctx(opctx);
- if (ret != 0) {
- GF_FREE(opctx);
- opctx = NULL;
- goto out;
- }
- }
+ opctx->lk_owner = *lk_owner;
+ opctx->valid |= SYNCOPCTX_LKOWNER;
out:
- if (opctx && lk_owner) {
- opctx->lk_owner = *lk_owner;
- opctx->valid |= SYNCOPCTX_LKOWNER;
- }
-
return ret;
}
+void *
+syncenv_processor(void *thdata);
+
static void
__run(struct synctask *task)
{
struct syncenv *env = NULL;
+ int32_t total, ret, i;
env = task->env;
@@ -261,7 +181,6 @@ __run(struct synctask *task)
env->runcount--;
break;
case SYNCTASK_WAIT:
- env->waitcount--;
break;
case SYNCTASK_DONE:
gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
@@ -275,8 +194,27 @@ __run(struct synctask *task)
}
list_add_tail(&task->all_tasks, &env->runq);
- env->runcount++;
task->state = SYNCTASK_RUN;
+
+ env->runcount++;
+
+ total = env->procs + env->runcount - env->procs_idle;
+ if (total > env->procmax) {
+ total = env->procmax;
+ }
+ if (total > env->procs) {
+ for (i = 0; i < env->procmax; i++) {
+ if (env->proc[i].env == NULL) {
+ env->proc[i].env = env;
+ ret = gf_thread_create(&env->proc[i].processor, NULL,
+ syncenv_processor, &env->proc[i],
+ "sproc%d", i);
+ if ((ret < 0) || (++env->procs >= total)) {
+ break;
+ }
+ }
+ }
+ }
}
static void
@@ -298,7 +236,6 @@ __wait(struct synctask *task)
gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK,
"re-waiting already waiting "
"task");
- env->waitcount--;
break;
case SYNCTASK_DONE:
gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
@@ -311,12 +248,11 @@ __wait(struct synctask *task)
}
list_add_tail(&task->all_tasks, &env->waitq);
- env->waitcount++;
task->state = SYNCTASK_WAIT;
}
void
-synctask_yield(struct synctask *task)
+synctask_yield(struct synctask *task, struct timespec *delta)
{
xlator_t *oldTHIS = THIS;
@@ -325,10 +261,16 @@ synctask_yield(struct synctask *task)
task->proc->sched.uc_flags &= ~_UC_TLSBASE;
#endif
+ task->delta = delta;
+
if (task->state != SYNCTASK_DONE) {
task->state = SYNCTASK_SUSPEND;
- (void)gf_backtrace_save(task->btbuf);
}
+
+#ifdef HAVE_TSAN_API
+ __tsan_switch_to_fiber(task->proc->tsan.fiber, 0);
+#endif
+
if (swapcontext(&task->ctx, &task->proc->sched) < 0) {
gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED,
"swapcontext failed");
@@ -338,6 +280,35 @@ synctask_yield(struct synctask *task)
}
void
+synctask_sleep(int32_t secs)
+{
+ struct timespec delta;
+ struct synctask *task;
+
+ task = synctask_get();
+
+ if (task == NULL) {
+ sleep(secs);
+ } else {
+ delta.tv_sec = secs;
+ delta.tv_nsec = 0;
+
+ synctask_yield(task, &delta);
+ }
+}
+
+static void
+__synctask_wake(struct synctask *task)
+{
+ task->woken = 1;
+
+ if (task->slept)
+ __run(task);
+
+ pthread_cond_broadcast(&task->env->cond);
+}
+
+void
synctask_wake(struct synctask *task)
{
struct syncenv *env = NULL;
@@ -346,13 +317,18 @@ synctask_wake(struct synctask *task)
pthread_mutex_lock(&env->mutex);
{
- task->woken = 1;
+ if (task->timer != NULL) {
+ if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) {
+ goto unlock;
+ }
- if (task->slept)
- __run(task);
+ task->timer = NULL;
+ task->synccond = NULL;
+ }
- pthread_cond_broadcast(&env->cond);
+ __synctask_wake(task);
}
+unlock:
pthread_mutex_unlock(&env->mutex);
}
@@ -371,7 +347,7 @@ synctask_wrap(void)
task->state = SYNCTASK_DONE;
- synctask_yield(task);
+ synctask_yield(task, NULL);
}
void
@@ -390,6 +366,10 @@ synctask_destroy(struct synctask *task)
pthread_cond_destroy(&task->cond);
}
+#ifdef HAVE_TSAN_API
+ __tsan_destroy_fiber(task->tsan.fiber);
+#endif
+
GF_FREE(task);
}
@@ -500,6 +480,13 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
makecontext(&newtask->ctx, (void (*)(void))synctask_wrap, 0);
+#ifdef HAVE_TSAN_API
+ newtask->tsan.fiber = __tsan_create_fiber(0);
+ snprintf(newtask->tsan.name, TSAN_THREAD_NAMELEN, "<synctask of %s>",
+ this->name);
+ __tsan_set_fiber_name(newtask->tsan.fiber, newtask->tsan.name);
+#endif
+
newtask->state = SYNCTASK_INIT;
newtask->slept = 1;
@@ -511,11 +498,6 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
}
synctask_wake(newtask);
- /*
- * Make sure someone's there to execute anything we just put on the
- * run queue.
- */
- syncenv_scale(env);
return newtask;
err:
@@ -609,8 +591,12 @@ syncenv_task(struct syncproc *proc)
goto unlock;
}
- sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME;
+ env->procs_idle++;
+
+ sleep_till.tv_sec = gf_time() + SYNCPROC_IDLE_TIME;
ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till);
+
+ env->procs_idle--;
}
task = list_entry(env->runq.next, struct synctask, all_tasks);
@@ -629,6 +615,34 @@ unlock:
return task;
}
+static void
+synctask_timer(void *data)
+{
+ struct synctask *task = data;
+ struct synccond *cond;
+
+ cond = task->synccond;
+ if (cond != NULL) {
+ pthread_mutex_lock(&cond->pmutex);
+
+ list_del_init(&task->waitq);
+ task->synccond = NULL;
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ task->ret = -ETIMEDOUT;
+ }
+
+ pthread_mutex_lock(&task->env->mutex);
+
+ gf_timer_call_cancel(task->xl->ctx, task->timer);
+ task->timer = NULL;
+
+ __synctask_wake(task);
+
+ pthread_mutex_unlock(&task->env->mutex);
+}
+
void
synctask_switchto(struct synctask *task)
{
@@ -644,6 +658,10 @@ synctask_switchto(struct synctask *task)
task->ctx.uc_flags &= ~_UC_TLSBASE;
#endif
+#ifdef HAVE_TSAN_API
+ __tsan_switch_to_fiber(task->tsan.fiber, 0);
+#endif
+
if (swapcontext(&task->proc->sched, &task->ctx) < 0) {
gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED,
"swapcontext failed");
@@ -661,7 +679,14 @@ synctask_switchto(struct synctask *task)
} else {
task->slept = 1;
__wait(task);
+
+ if (task->delta != NULL) {
+ task->timer = gf_timer_call_after(task->xl->ctx, *task->delta,
+ synctask_timer, task);
+ }
}
+
+ task->delta = NULL;
}
pthread_mutex_unlock(&env->mutex);
}
@@ -669,68 +694,27 @@ synctask_switchto(struct synctask *task)
void *
syncenv_processor(void *thdata)
{
- struct syncenv *env = NULL;
struct syncproc *proc = NULL;
struct synctask *task = NULL;
proc = thdata;
- env = proc->env;
- for (;;) {
- task = syncenv_task(proc);
- if (!task)
- break;
+#ifdef HAVE_TSAN_API
+ proc->tsan.fiber = __tsan_create_fiber(0);
+ snprintf(proc->tsan.name, TSAN_THREAD_NAMELEN, "<sched of syncenv@%p>",
+ proc);
+ __tsan_set_fiber_name(proc->tsan.fiber, proc->tsan.name);
+#endif
+ while ((task = syncenv_task(proc)) != NULL) {
synctask_switchto(task);
-
- syncenv_scale(env);
}
- return NULL;
-}
-
-void
-syncenv_scale(struct syncenv *env)
-{
- int diff = 0;
- int scale = 0;
- int i = 0;
- int ret = 0;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
-
- pthread_mutex_lock(&env->mutex);
- {
- if (env->procs > env->runcount)
- goto unlock;
-
- scale = env->runcount;
- if (scale > env->procmax)
- scale = env->procmax;
- if (scale > env->procs)
- diff = scale - env->procs;
- while (diff) {
- diff--;
- for (; (i < env->procmax); i++) {
- if (env->proc[i].processor == 0)
- break;
- }
+#ifdef HAVE_TSAN_API
+ __tsan_destroy_fiber(proc->tsan.fiber);
+#endif
- env->proc[i].env = env;
- snprintf(thread_name, sizeof(thread_name), "sproc%03hx",
- (env->procs & 0x3ff));
- ret = gf_thread_create(&env->proc[i].processor, NULL,
- syncenv_processor, &env->proc[i],
- thread_name);
- if (ret)
- break;
- env->procs++;
- i++;
- }
- }
-unlock:
- pthread_mutex_unlock(&env->mutex);
+ return NULL;
}
/* The syncenv threads are cleaned up in this routine.
@@ -784,9 +768,6 @@ syncenv_new(size_t stacksize, int procmin, int procmax)
struct syncenv *newenv = NULL;
int ret = 0;
int i = 0;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
if (!procmin || procmin < 0)
procmin = SYNCENV_PROC_MIN;
@@ -812,14 +793,13 @@ syncenv_new(size_t stacksize, int procmin, int procmax)
newenv->stacksize = stacksize;
newenv->procmin = procmin;
newenv->procmax = procmax;
+ newenv->procs_idle = 0;
for (i = 0; i < newenv->procmin; i++) {
newenv->proc[i].env = newenv;
- snprintf(thread_name, sizeof(thread_name), "%s%d", "sproc",
- (newenv->procs));
ret = gf_thread_create(&newenv->proc[i].processor, NULL,
- syncenv_processor, &newenv->proc[i],
- thread_name);
+ syncenv_processor, &newenv->proc[i], "sproc%d",
+ i);
if (ret)
break;
newenv->procs++;
@@ -909,7 +889,7 @@ __synclock_lock(struct synclock *lock)
task->woken = 0;
list_add_tail(&task->waitq, &lock->waitq);
pthread_mutex_unlock(&lock->guard);
- synctask_yield(task);
+ synctask_yield(task, NULL);
/* task is removed from waitq in unlock,
* under lock->guard.*/
pthread_mutex_lock(&lock->guard);
@@ -1062,6 +1042,136 @@ synclock_unlock(synclock_t *lock)
return ret;
}
+/* Condition variables */
+
+int32_t
+synccond_init(synccond_t *cond)
+{
+ int32_t ret;
+
+ INIT_LIST_HEAD(&cond->waitq);
+
+ ret = pthread_mutex_init(&cond->pmutex, NULL);
+ if (ret != 0) {
+ return -ret;
+ }
+
+ ret = pthread_cond_init(&cond->pcond, NULL);
+ if (ret != 0) {
+ pthread_mutex_destroy(&cond->pmutex);
+ }
+
+ return -ret;
+}
+
+void
+synccond_destroy(synccond_t *cond)
+{
+ pthread_cond_destroy(&cond->pcond);
+ pthread_mutex_destroy(&cond->pmutex);
+}
+
+int
+synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta)
+{
+ struct timespec now;
+ struct synctask *task = NULL;
+ int ret;
+
+ task = synctask_get();
+
+ if (task == NULL) {
+ if (delta != NULL) {
+ timespec_now_realtime(&now);
+ timespec_adjust_delta(&now, *delta);
+ }
+
+ pthread_mutex_lock(&cond->pmutex);
+
+ if (delta == NULL) {
+ ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex);
+ } else {
+ ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now);
+ }
+ } else {
+ pthread_mutex_lock(&cond->pmutex);
+
+ list_add_tail(&task->waitq, &cond->waitq);
+ task->synccond = cond;
+
+ ret = synclock_unlock(lock);
+ if (ret == 0) {
+ pthread_mutex_unlock(&cond->pmutex);
+
+ synctask_yield(task, delta);
+
+ ret = synclock_lock(lock);
+ if (ret == 0) {
+ ret = task->ret;
+ }
+ task->ret = 0;
+
+ return ret;
+ }
+
+ list_del_init(&task->waitq);
+ }
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ return ret;
+}
+
+int
+synccond_wait(synccond_t *cond, synclock_t *lock)
+{
+ return synccond_timedwait(cond, lock, NULL);
+}
+
+void
+synccond_signal(synccond_t *cond)
+{
+ struct synctask *task;
+
+ pthread_mutex_lock(&cond->pmutex);
+
+ if (!list_empty(&cond->waitq)) {
+ task = list_first_entry(&cond->waitq, struct synctask, waitq);
+ list_del_init(&task->waitq);
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ synctask_wake(task);
+ } else {
+ pthread_cond_signal(&cond->pcond);
+
+ pthread_mutex_unlock(&cond->pmutex);
+ }
+}
+
+void
+synccond_broadcast(synccond_t *cond)
+{
+ struct list_head list;
+ struct synctask *task;
+
+ INIT_LIST_HEAD(&list);
+
+ pthread_mutex_lock(&cond->pmutex);
+
+ list_splice_init(&cond->waitq, &list);
+ pthread_cond_broadcast(&cond->pcond);
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ while (!list_empty(&list)) {
+ task = list_first_entry(&list, struct synctask, waitq);
+ list_del_init(&task->waitq);
+
+ synctask_wake(task);
+ }
+}
+
/* Barriers */
int
@@ -1131,7 +1241,7 @@ __syncbarrier_wait(struct syncbarrier *barrier, int waitfor)
/* called within a synctask */
list_add_tail(&task->waitq, &barrier->waitq);
pthread_mutex_unlock(&barrier->guard);
- synctask_yield(task);
+ synctask_yield(task, NULL);
pthread_mutex_lock(&barrier->guard);
} else {
/* called by a non-synctask */
@@ -2980,12 +3090,13 @@ syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what,
SYNCOP(subvol, (&args), syncop_seek_cbk, subvol->fops->seek, fd, offset,
what, xdata_in);
- if (*off)
- *off = args.offset;
-
- if (args.op_ret == -1)
+ if (args.op_ret < 0) {
return -args.op_errno;
- return args.op_ret;
+ } else {
+ if (off)
+ *off = args.offset;
+ return args.op_ret;
+ }
}
int
@@ -3397,4 +3508,65 @@ syncop_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
__wake(args);
return 0;
-} \ No newline at end of file
+}
+
+int
+syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *preiatt_dst, struct iatt *postiatt_dst,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_copy_file_range_cbk,
+ subvol->fops->copy_file_range, fd_in, off_in, fd_out, off_out, len,
+ flags, xdata_in);
+
+ if (stbuf) {
+ *stbuf = args.iatt1;
+ }
+ if (preiatt_dst) {
+ *preiatt_dst = args.iatt2;
+ }
+ if (postiatt_dst) {
+ *postiatt_dst = args.iatt3;
+ }
+
+ if (xdata_out) {
+ *xdata_out = args.xdata;
+ } else if (args.xdata) {
+ dict_unref(args.xdata);
+ }
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ args->iatt1 = *stbuf;
+ args->iatt2 = *prebuf_dst;
+ args->iatt3 = *postbuf_dst;
+ }
+
+ __wake(args);
+
+ return 0;
+}
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index 0987a8628c2..04400f98b6c 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -8,17 +8,24 @@
cases as published by the Free Software Foundation.
*/
-#include "syscall.h"
-#include "compat.h"
-#include "mem-pool.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/libglusterfs-messages.h"
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#include <signal.h>
+#endif
#include <sys/types.h>
#include <utime.h>
#include <sys/time.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdarg.h>
+#ifdef HAVE_COPY_FILE_RANGE_SYS
+#include <sys/syscall.h>
+#endif
#define FS_ERROR_LOG(result) \
do { \
@@ -211,6 +218,15 @@ sys_unlink(const char *pathname)
}
int
+sys_unlinkat(int dfd, const char *pathname)
+{
+#ifdef GF_SOLARIS_HOST_OS
+ return FS_RET_CHECK0(solaris_unlinkat(dfd, pathname, 0), errno);
+#endif
+ return FS_RET_CHECK0(unlinkat(dfd, pathname, 0), errno);
+}
+
+int
sys_rmdir(const char *pathname)
{
return FS_RET_CHECK0(rmdir(pathname), errno);
@@ -223,6 +239,12 @@ sys_symlink(const char *oldpath, const char *newpath)
}
int
+sys_symlinkat(const char *oldpath, int dirfd, const char *newpath)
+{
+ return FS_RET_CHECK0(symlinkat(oldpath, dirfd, newpath), errno);
+}
+
+int
sys_rename(const char *oldpath, const char *newpath)
{
#ifdef GF_SOLARIS_HOST_OS
@@ -250,6 +272,12 @@ sys_link(const char *oldpath, const char *newpath)
}
int
+sys_linkat(int oldfd, const char *oldpath, int newfd, const char *newpath)
+{
+ return FS_RET_CHECK0(linkat(oldfd, oldpath, newfd, newpath, 0), errno);
+}
+
+int
sys_chmod(const char *path, mode_t mode)
{
return FS_RET_CHECK0(chmod(path, mode), errno);
@@ -482,7 +510,7 @@ sys_lsetxattr(const char *path, const char *name, const void *value,
#endif
#ifdef GF_BSD_HOST_OS
- return FS_RET_CHECK0(
+ return FS_RET_CHECK(
extattr_set_link(path, EXTATTR_NAMESPACE_USER, name, value, size),
errno);
#endif
@@ -600,7 +628,7 @@ sys_fsetxattr(int filedes, const char *name, const void *value, size_t size,
#endif
#ifdef GF_BSD_HOST_OS
- return FS_RET_CHECK0(
+ return FS_RET_CHECK(
extattr_set_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size),
errno);
#endif
@@ -802,3 +830,47 @@ err:
#endif
return newsock;
}
+
+ssize_t
+sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
+ size_t len, unsigned int flags)
+{
+ /*
+ * TODO: Add check for other platofrms like freebsd etc if this syscall is
+ * not generic.
+ * This is what the function does.
+ * 1) Check whether copy_file_range API is present. If so call it.
+ * 2) If copy_file_range API is not present, then check whether
+ * the system call is there. If so, then use syscall to invoke
+ * SYS_copy_file_range system call.
+ * 3) If neither of the above is present, then return ENOSYS.
+ */
+#ifdef HAVE_COPY_FILE_RANGE
+ return FS_RET_CHECK(
+ copy_file_range(fd_in, off_in, fd_out, off_out, len, flags), errno);
+#else
+#ifdef HAVE_COPY_FILE_RANGE_SYS
+ return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len,
+ flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif /* HAVE_COPY_FILE_RANGE_SYS */
+#endif /* HAVE_COPY_FILE_RANGE */
+}
+
+#ifdef __FreeBSD__
+int
+sys_kill(pid_t pid, int sig)
+{
+ return FS_RET_CHECK0(kill(pid, sig), errno);
+}
+
+int
+sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ const void *newp, size_t newlen)
+{
+ return FS_RET_CHECK0(sysctl(name, namelen, oldp, oldlenp, newp, newlen),
+ errno);
+}
+#endif
diff --git a/libglusterfs/src/throttle-tbf.c b/libglusterfs/src/throttle-tbf.c
index 9519defa37f..e11ca4f9d35 100644
--- a/libglusterfs/src/throttle-tbf.c
+++ b/libglusterfs/src/throttle-tbf.c
@@ -23,8 +23,8 @@
*
*/
-#include "mem-pool.h"
-#include "throttle-tbf.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/throttle-tbf.h"
typedef struct tbf_throttle {
char done;
@@ -99,7 +99,7 @@ tbf_tokengenerator(void *arg)
token_gen_interval = bucket->token_gen_interval;
while (1) {
- usleep(token_gen_interval);
+ gf_nanosleep(token_gen_interval * GF_US_IN_NS);
LOCK(&bucket->lock);
{
diff --git a/libglusterfs/src/tier-ctr-interface.h b/libglusterfs/src/tier-ctr-interface.h
deleted file mode 100644
index 8a627542c0c..00000000000
--- a/libglusterfs/src/tier-ctr-interface.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _TIER_CTR_INTERFACE_H_
-#define _TIER_CTR_INTERFACE_H_
-
-#include "common-utils.h"
-#include "gfdb_data_store_types.h"
-
-#define GFDB_IPC_CTR_KEY "gfdb.ipc-ctr-op"
-
-/*
- * CTR IPC OPERATIONS
- *
- *
- */
-#define GFDB_IPC_CTR_QUERY_OPS "gfdb.ipc-ctr-query-op"
-#define GFDB_IPC_CTR_CLEAR_OPS "gfdb.ipc-ctr-clear-op"
-#define GFDB_IPC_CTR_GET_DB_PARAM_OPS "gfdb.ipc-ctr-get-db-parm"
-#define GFDB_IPC_CTR_GET_DB_VERSION_OPS "gfdb.ipc-ctr-get-db-version"
-#define GFDB_IPC_CTR_SET_COMPACT_PRAGMA "gfdb.ipc-ctr-set-compact-pragma"
-/*
- * CTR IPC INPUT/OUTPUT
- *
- *
- */
-#define GFDB_IPC_CTR_GET_QFILE_PATH "gfdb.ipc-ctr-get-qfile-path"
-#define GFDB_IPC_CTR_GET_QUERY_PARAMS "gfdb.ipc-ctr-get-query-parms"
-#define GFDB_IPC_CTR_RET_QUERY_COUNT "gfdb.ipc-ctr-ret-rec-count"
-#define GFDB_IPC_CTR_GET_DB_KEY "gfdb.ipc-ctr-get-params-key"
-#define GFDB_IPC_CTR_RET_DB_VERSION "gfdb.ipc-ctr-ret-db-version"
-
-/*
- * gfdb ipc ctr params for query
- *
- *
- */
-typedef struct gfdb_ipc_ctr_params {
- gf_boolean_t is_promote;
- int write_freq_threshold;
- int read_freq_threshold;
- gfdb_time_t time_stamp;
- int query_limit;
- gf_boolean_t emergency_demote;
-} gfdb_ipc_ctr_params_t;
-
-#endif
diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
index 88a28a9bd16..66c861b04cd 100644
--- a/libglusterfs/src/timer.c
+++ b/libglusterfs/src/timer.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "timer.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "globals.h"
-#include "timespec.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/timer.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/timespec.h"
+#include "glusterfs/libglusterfs-messages.h"
/* fwd decl */
static gf_timer_registry_t *
@@ -53,7 +53,7 @@ gf_timer_call_after(glusterfs_ctx_t *ctx, struct timespec delta,
event->callbk = callbk;
event->data = data;
event->xl = THIS;
- LOCK(&reg->lock);
+ pthread_mutex_lock(&reg->lock);
{
list_for_each_entry_reverse(trav, &reg->active, list)
{
@@ -61,8 +61,11 @@ gf_timer_call_after(glusterfs_ctx_t *ctx, struct timespec delta,
break;
}
list_add(&event->list, &trav->list);
+ if (&trav->list == &reg->active) {
+ pthread_cond_signal(&reg->cond);
+ }
}
- UNLOCK(&reg->lock);
+ pthread_mutex_unlock(&reg->lock);
return event;
}
@@ -75,13 +78,13 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
if (ctx == NULL || event == NULL) {
gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
"invalid argument");
- return 0;
+ return -1;
}
if (ctx->cleanup_started) {
gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED,
"ctx cleanup started");
- return 0;
+ return -1;
}
LOCK(&ctx->lock);
@@ -93,13 +96,12 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
if (!reg) {
/* This can happen when cleanup may have just started and
* gf_timer_registry_destroy() sets ctx->timer to NULL.
- * Just bail out as success as gf_timer_proc() takes
- * care of cleaning up the events.
+ * gf_timer_proc() takes care of cleaning up the events.
*/
- return 0;
+ return -1;
}
- LOCK(&reg->lock);
+ pthread_mutex_lock(&reg->lock);
{
fired = event->fired;
if (fired)
@@ -107,7 +109,7 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
list_del(&event->list);
}
unlock:
- UNLOCK(&reg->lock);
+ pthread_mutex_unlock(&reg->lock);
if (!fired) {
GF_FREE(event);
@@ -120,64 +122,29 @@ static void *
gf_timer_proc(void *data)
{
gf_timer_registry_t *reg = data;
- struct timespec sleepts;
gf_timer_t *event = NULL;
gf_timer_t *tmp = NULL;
xlator_t *old_THIS = NULL;
+ pthread_mutex_lock(&reg->lock);
+
while (!reg->fin) {
- uint64_t now;
- struct timespec now_ts;
-
- timespec_now(&now_ts);
- now = TS(now_ts);
- while (1) {
- uint64_t at;
- char need_cbk = 0;
-
- /*
- * This will be overridden with a shorter interval if
- * there's an event scheduled sooner. That makes the
- * system more responsive in most cases, but doesn't
- * include the case where a timer is added while we're
- * asleep. It's tempting to use pthread_cond_timedwait,
- * with the caveat that we'd be relying on system time
- * instead of monotonic time. That's a mess when the
- * system time is adjusted. Another alternative might
- * be to use pthread_kill, but that will remain TBD for
- * now.
- */
- sleepts.tv_sec = 1;
- sleepts.tv_nsec = 0;
-
- LOCK(&reg->lock);
- {
- /*
- * Using list_for_each and then always breaking
- * after the first iteration might seem strange,
- * but (unlike alternatives) is independent of
- * the underlying list implementation.
- */
- list_for_each_entry_safe(event, tmp, &reg->active, list)
- {
- at = TS(event->at);
- if (now >= at) {
- need_cbk = 1;
- event->fired = _gf_true;
- list_del(&event->list);
- } else {
- uint64_t diff = now - at;
-
- if (diff < 1000000000) {
- sleepts.tv_sec = 0;
- sleepts.tv_nsec = diff;
- }
- }
- break;
- }
- }
- UNLOCK(&reg->lock);
- if (need_cbk) {
+ if (list_empty(&reg->active)) {
+ pthread_cond_wait(&reg->cond, &reg->lock);
+ } else {
+ struct timespec now;
+
+ timespec_now(&now);
+ event = list_first_entry(&reg->active, gf_timer_t, list);
+ if (TS(now) < TS(event->at)) {
+ now = event->at;
+ pthread_cond_timedwait(&reg->cond, &reg->lock, &now);
+ } else {
+ event->fired = _gf_true;
+ list_del_init(&event->list);
+
+ pthread_mutex_unlock(&reg->lock);
+
old_THIS = NULL;
if (event->xl) {
old_THIS = THIS;
@@ -188,26 +155,29 @@ gf_timer_proc(void *data)
if (old_THIS) {
THIS = old_THIS;
}
- } else {
- break;
+
+ pthread_mutex_lock(&reg->lock);
}
}
- nanosleep(&sleepts, NULL);
}
- LOCK(&reg->lock);
+ /* Do not call gf_timer_call_cancel(),
+ * it will lead to deadlock
+ */
+ list_for_each_entry_safe(event, tmp, &reg->active, list)
{
- /* Do not call gf_timer_call_cancel(),
- * it will lead to deadlock
+ list_del(&event->list);
+ /* TODO Possible resource leak
+ * Before freeing the event, we need to call the respective
+ * event functions and free any resources.
+ * For example, In case of rpc_clnt_reconnect, we need to
+ * unref rpc object which was taken when added to timer
+ * wheel.
*/
- list_for_each_entry_safe(event, tmp, &reg->active, list)
- {
- list_del(&event->list);
- GF_FREE(event);
- }
+ GF_FREE(event);
}
- UNLOCK(&reg->lock);
- LOCK_DESTROY(&reg->lock);
+
+ pthread_mutex_unlock(&reg->lock);
return NULL;
}
@@ -217,6 +187,7 @@ gf_timer_registry_init(glusterfs_ctx_t *ctx)
{
gf_timer_registry_t *reg = NULL;
int ret = -1;
+ pthread_condattr_t attr;
LOCK(&ctx->lock);
{
@@ -231,7 +202,10 @@ gf_timer_registry_init(glusterfs_ctx_t *ctx)
goto out;
}
ctx->timer = reg;
- LOCK_INIT(&reg->lock);
+ pthread_mutex_init(&reg->lock, NULL);
+ pthread_condattr_init(&attr);
+ pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+ pthread_cond_init(&reg->cond, &attr);
INIT_LIST_HEAD(&reg->active);
}
UNLOCK(&ctx->lock);
@@ -265,7 +239,18 @@ gf_timer_registry_destroy(glusterfs_ctx_t *ctx)
return;
thr_id = reg->th;
+
+ pthread_mutex_lock(&reg->lock);
+
reg->fin = 1;
+ pthread_cond_signal(&reg->cond);
+
+ pthread_mutex_unlock(&reg->lock);
+
pthread_join(thr_id, NULL);
+
+ pthread_cond_destroy(&reg->cond);
+ pthread_mutex_destroy(&reg->lock);
+
GF_FREE(reg);
}
diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c
index c0d4ab9315b..96cef5c6f07 100644
--- a/libglusterfs/src/timespec.c
+++ b/libglusterfs/src/timespec.c
@@ -19,10 +19,9 @@
static mach_timebase_info_data_t gf_timebase;
#endif
-#include "logging.h"
-#include "timespec.h"
-#include "libglusterfs-messages.h"
-#include "common-utils.h"
+#include "glusterfs/timespec.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/common-utils.h"
void
timespec_now(struct timespec *ts)
@@ -71,6 +70,28 @@ timespec_now(struct timespec *ts)
}
void
+timespec_now_realtime(struct timespec *ts)
+{
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \
+ defined GF_BSD_HOST_OS
+ if (0 == clock_gettime(CLOCK_REALTIME, ts)) {
+ return;
+ }
+#endif
+
+ /* Fall back to gettimeofday()*/
+ struct timeval tv = {
+ 0,
+ };
+ if (0 == gettimeofday(&tv, NULL)) {
+ TIMEVAL_TO_TIMESPEC(&tv, ts);
+ return;
+ }
+
+ return;
+}
+
+void
timespec_adjust_delta(struct timespec *ts, struct timespec delta)
{
ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000);
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c
index 4f01bcfe0da..809550b864c 100644
--- a/libglusterfs/src/trie.c
+++ b/libglusterfs/src/trie.c
@@ -10,11 +10,9 @@
#include <stdio.h>
#include <string.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include "common-utils.h"
-#include "trie.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/trie.h"
#define DISTANCE_EDIT 1
#define DISTANCE_INS 1
diff --git a/libglusterfs/src/unittest/global_mock.c b/libglusterfs/src/unittest/global_mock.c
index 52156847d81..2fcf96dbad8 100644
--- a/libglusterfs/src/unittest/global_mock.c
+++ b/libglusterfs/src/unittest/global_mock.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
#include <stdarg.h>
#include <stddef.h>
diff --git a/libglusterfs/src/unittest/log_mock.c b/libglusterfs/src/unittest/log_mock.c
index d342de31067..60f6530726b 100644
--- a/libglusterfs/src/unittest/log_mock.c
+++ b/libglusterfs/src/unittest/log_mock.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
#include <stdarg.h>
#include <stddef.h>
diff --git a/libglusterfs/src/unittest/mem_pool_unittest.c b/libglusterfs/src/unittest/mem_pool_unittest.c
index 32137b95bb0..9ca324329ba 100644
--- a/libglusterfs/src/unittest/mem_pool_unittest.c
+++ b/libglusterfs/src/unittest/mem_pool_unittest.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "mem-pool.h"
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
#include <stdarg.h>
#include <stddef.h>
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index cbbe8cf7f12..9a2582d45d5 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
+#include "glusterfs/xlator.h"
#include <dlfcn.h>
#include <netdb.h>
#include <fnmatch.h>
-#include "defaults.h"
-#include "libglusterfs-messages.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/libglusterfs-messages.h"
#define SET_DEFAULT_FOP(fn) \
do { \
@@ -143,6 +143,7 @@ fill_defaults(xlator_t *xl)
SET_DEFAULT_FOP(getspec);
SET_DEFAULT_FOP(icreate);
SET_DEFAULT_FOP(namelink);
+ SET_DEFAULT_FOP(copy_file_range);
if (!xl->cbks)
xl->cbks = &default_cbks;
@@ -183,9 +184,11 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle,
volume_opt_list_t *opt_list)
{
int ret = -1;
+ int flag = 0;
char *name = NULL;
void *handle = NULL;
xlator_api_t *xlapi = NULL;
+ volume_option_t *opt = NULL;
GF_VALIDATE_OR_GOTO("xlator", xlator_type, out);
@@ -193,8 +196,10 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle,
* need this check */
if (!strstr(xlator_type, "rpc-transport"))
ret = gf_asprintf(&name, "%s/%s.so", XLATORDIR, xlator_type);
- else
+ else {
+ flag = 1;
ret = gf_asprintf(&name, "%s/%s.so", XLATORPARENTDIR, xlator_type);
+ }
if (-1 == ret) {
goto out;
}
@@ -205,34 +210,34 @@ xlator_volopt_dynload(char *xlator_type, void **dl_handle,
handle = dlopen(name, RTLD_NOW);
if (!handle) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "%s",
- dlerror());
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "error=%s",
+ dlerror(), NULL);
goto out;
}
- /* check new struct first, and then check this */
- xlapi = dlsym(handle, "xlator_api");
- if (!xlapi) {
- gf_msg("xlator", GF_LOG_DEBUG, 0, LG_MSG_DLSYM_ERROR,
- "dlsym(xlator_api) on %s. "
- "Fall back to old symbols",
- dlerror());
- /* This case is not an error for now, so allow it
- to fall back to old methods. */
- opt_list->given_opt = dlsym(handle, "options");
- if (!opt_list->given_opt) {
- dlerror();
- gf_msg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED,
- "Failed to load xlator opt table");
+ if (flag == 0) {
+ /* check new struct first, and then check this */
+ xlapi = dlsym(handle, "xlator_api");
+ if (!xlapi) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s",
+ dlerror(), NULL);
goto out;
}
- } else {
+
opt_list->given_opt = xlapi->options;
if (!opt_list->given_opt) {
- gf_msg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED,
- "Failed to load xlator options table");
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED, NULL);
goto out;
}
+ } else {
+ opt = dlsym(handle, "options");
+ if (!opt) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s",
+ dlerror(), NULL);
+ goto out;
+ }
+
+ opt_list->given_opt = opt;
}
*dl_handle = handle;
@@ -248,134 +253,29 @@ out:
return ret;
}
-int
-xlator_dynload_oldway(xlator_t *xl)
-{
- int i = 0;
- int ret = -1;
- void *handle = NULL;
- volume_opt_list_t *vol_opt = NULL;
- class_methods_t *vtbl = NULL;
-
- handle = xl->dlhandle;
-
- xl->fops = dlsym(handle, "fops");
- if (!xl->fops) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR,
- "dlsym(fops) on %s", dlerror());
- goto out;
- }
-
- xl->cbks = dlsym(handle, "cbks");
- if (!xl->cbks) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR,
- "dlsym(cbks) on %s", dlerror());
- goto out;
- }
-
- /*
- * If class_methods exists, its contents override any definitions of
- * init or fini for that translator. Otherwise, we fall back to the
- * older method of looking for init and fini directly.
- */
- vtbl = dlsym(handle, "class_methods");
- if (vtbl) {
- xl->init = vtbl->init;
- xl->fini = vtbl->fini;
- xl->reconfigure = vtbl->reconfigure;
- xl->notify = vtbl->notify;
- } else {
- if (!(*VOID(&xl->init) = dlsym(handle, "init"))) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR,
- "dlsym(init) on %s", dlerror());
- goto out;
- }
-
- if (!(*VOID(&(xl->fini)) = dlsym(handle, "fini"))) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR,
- "dlsym(fini) on %s", dlerror());
- goto out;
- }
- if (!(*VOID(&(xl->reconfigure)) = dlsym(handle, "reconfigure"))) {
- gf_msg_trace("xlator", 0,
- "dlsym(reconfigure) on %s "
- "-- neglecting",
- dlerror());
- }
- if (!(*VOID(&(xl->notify)) = dlsym(handle, "notify"))) {
- gf_msg_trace("xlator", 0,
- "dlsym(notify) on %s -- "
- "neglecting",
- dlerror());
- }
- }
-
- if (!(xl->dumpops = dlsym(handle, "dumpops"))) {
- gf_msg_trace("xlator", 0,
- "dlsym(dumpops) on %s -- "
- "neglecting",
- dlerror());
- }
-
- if (!(*VOID(&(xl->mem_acct_init)) = dlsym(handle, "mem_acct_init"))) {
- gf_msg_trace(xl->name, 0,
- "dlsym(mem_acct_init) on %s -- "
- "neglecting",
- dlerror());
- }
-
- vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
-
- if (!vol_opt) {
- goto out;
- }
-
- INIT_LIST_HEAD(&vol_opt->list);
- vol_opt->given_opt = dlsym(handle, "options");
- if (!vol_opt->given_opt) {
- vol_opt->given_opt = default_options;
- }
- list_add_tail(&vol_opt->list, &xl->volume_options);
-
- /* make sure 'min' is set to high value, so it would be
- properly set later */
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- xl->stats.interval.latencies[i].min = 0xffffffff;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-int
-xlator_dynload_newway(xlator_t *xl)
+static int
+xlator_dynload_apis(xlator_t *xl)
{
int ret = -1;
void *handle = NULL;
volume_opt_list_t *vol_opt = NULL;
xlator_api_t *xlapi = NULL;
+ int i = 0;
handle = xl->dlhandle;
xlapi = dlsym(handle, "xlator_api");
if (!xlapi) {
- gf_msg("xlator", GF_LOG_INFO, 0, LG_MSG_DLSYM_ERROR,
- "dlsym(xlator_api) on %s. "
- "Fall back to old symbols",
- dlerror());
- /* This case is not an error for now, so allow it
- to fall back to old methods. */
- ret = 1;
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "dlsym=%s",
+ dlerror(), NULL);
+ ret = -1;
goto out;
}
xl->fops = xlapi->fops;
if (!xl->fops) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR,
- "%s: struct missing (fops)", xl->name);
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_STRUCT_MISS, "name=%s",
+ xl->name, NULL);
goto out;
}
@@ -386,8 +286,8 @@ xlator_dynload_newway(xlator_t *xl)
xl->init = xlapi->init;
if (!xl->init) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLSYM_ERROR,
- "%s: method missing (init)", xl->name);
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_METHOD_MISS, "name=%s",
+ xl->name, NULL);
goto out;
}
@@ -458,6 +358,10 @@ xlator_dynload_newway(xlator_t *xl)
memcpy(xl->op_version, xlapi->op_version,
sizeof(uint32_t) * GF_MAX_RELEASES);
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_latency_reset(&xl->stats.interval.latencies[i]);
+ }
+
ret = 0;
out:
return ret;
@@ -485,21 +389,15 @@ xlator_dynload(xlator_t *xl)
handle = dlopen(name, RTLD_NOW);
if (!handle) {
- gf_msg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "%s",
- dlerror());
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "error=%s",
+ dlerror(), NULL);
goto out;
}
xl->dlhandle = handle;
- ret = xlator_dynload_newway(xl);
+ ret = xlator_dynload_apis(xl);
if (-1 == ret)
goto out;
- if (1 == ret) {
- /* it means we don't find the new symbol in xlator code */
- ret = xlator_dynload_oldway(xl);
- if (-1 == ret)
- goto out;
- }
fill_defaults(xl);
@@ -559,10 +457,8 @@ xlator_set_inode_lru_limit(xlator_t *this, void *data)
if (this->itable) {
if (!data) {
- gf_msg(this->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
- "input data is NULL. "
- "Cannot update the lru limit of the inode"
- " table. Continuing with older value");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, LG_MSG_INPUT_DATA_NULL,
+ NULL);
goto out;
}
inode_lru_limit = *(int *)data;
@@ -735,20 +631,16 @@ xlator_init(xlator_t *xl)
xl->instance_name = NULL;
GF_ATOMIC_INIT(xl->xprtrefcnt, 0);
- GF_ATOMIC_INIT(xl->fd_cnt, 0);
if (!xl->init) {
- gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED,
- "No init() found");
+ gf_smsg(xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED, NULL);
goto out;
}
ret = __xlator_init(xl);
if (ret) {
- gf_msg(xl->name, GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
- "Initialization of volume '%s' failed,"
- " review your volfile again",
- xl->name);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR, "name=%s",
+ xl->name, NULL);
goto out;
}
@@ -781,6 +673,7 @@ xlator_fini_rec(xlator_t *xl)
trav = trav->next;
}
+ xl->cleanup_starting = 1;
if (xl->init_succeeded) {
if (xl->fini) {
old_THIS = THIS;
@@ -788,8 +681,14 @@ xlator_fini_rec(xlator_t *xl)
xl->fini(xl);
- if (xl->local_pool)
+ if (xl->local_pool) {
mem_pool_destroy(xl->local_pool);
+ xl->local_pool = NULL;
+ }
+ if (xl->itable) {
+ inode_table_destroy(xl->itable);
+ xl->itable = NULL;
+ }
THIS = old_THIS;
} else {
@@ -858,6 +757,19 @@ xlator_mem_acct_init(xlator_t *xl, int num_types)
}
void
+xlator_mem_acct_unref(struct mem_acct *mem_acct)
+{
+ uint32_t i;
+
+ if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
+ for (i = 0; i < mem_acct->num_types; i++) {
+ LOCK_DESTROY(&(mem_acct->rec[i].lock));
+ }
+ FREE(mem_acct);
+ }
+}
+
+void
xlator_tree_fini(xlator_t *xl)
{
xlator_t *top = NULL;
@@ -888,7 +800,6 @@ xlator_list_destroy(xlator_list_t *list)
int
xlator_memrec_free(xlator_t *xl)
{
- uint32_t i = 0;
struct mem_acct *mem_acct = NULL;
if (!xl) {
@@ -897,13 +808,8 @@ xlator_memrec_free(xlator_t *xl)
mem_acct = xl->mem_acct;
if (mem_acct) {
- for (i = 0; i < mem_acct->num_types; i++) {
- LOCK_DESTROY(&(mem_acct->rec[i].lock));
- }
- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
- FREE(mem_acct);
- xl->mem_acct = NULL;
- }
+ xlator_mem_acct_unref(mem_acct);
+ xl->mem_acct = NULL;
}
return 0;
@@ -920,7 +826,7 @@ xlator_members_free(xlator_t *xl)
GF_FREE(xl->name);
GF_FREE(xl->type);
- if (!(xl->ctx && xl->ctx->cmd_args.valgrind) && xl->dlhandle)
+ if (!(xl->ctx && xl->ctx->cmd_args.vgtool != _gf_none) && xl->dlhandle)
dlclose(xl->dlhandle);
if (xl->options)
dict_unref(xl->options);
@@ -970,8 +876,7 @@ xlator_tree_free_members(xlator_t *tree)
xlator_t *prev = tree;
if (!tree) {
- gf_msg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND,
- "Translator tree not found");
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
return -1;
}
@@ -991,8 +896,7 @@ xlator_tree_free_memacct(xlator_t *tree)
xlator_t *prev = tree;
if (!tree) {
- gf_msg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND,
- "Translator tree not found");
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
return -1;
}
@@ -1016,7 +920,6 @@ xlator_mem_free(xlator_t *xl)
return 0;
if (xl->options) {
- dict_ref(xl->options);
dict_unref(xl->options);
xl->options = NULL;
}
@@ -1035,7 +938,7 @@ xlator_mem_free(xlator_t *xl)
static void
xlator_call_fini(xlator_t *this)
{
- if (!this || this->cleanup_starting)
+ if (!this || this->call_cleanup)
return;
this->cleanup_starting = 1;
this->call_cleanup = 1;
@@ -1054,6 +957,8 @@ xlator_mem_cleanup(xlator_t *this)
xlator_list_t **trav_p = NULL;
xlator_t *top = NULL;
xlator_t *victim = NULL;
+ glusterfs_graph_t *graph = NULL;
+ gf_boolean_t graph_cleanup = _gf_false;
if (this->call_cleanup || !this->ctx)
return;
@@ -1061,6 +966,12 @@ xlator_mem_cleanup(xlator_t *this)
this->call_cleanup = 1;
ctx = this->ctx;
+ inode_table = this->itable;
+ if (inode_table) {
+ inode_table_destroy(inode_table);
+ this->itable = NULL;
+ }
+
xlator_call_fini(trav);
while (prev) {
@@ -1069,12 +980,6 @@ xlator_mem_cleanup(xlator_t *this)
prev = trav;
}
- inode_table = this->itable;
- if (inode_table) {
- inode_table_destroy(inode_table);
- this->itable = NULL;
- }
-
if (this->fini) {
this->fini(this);
}
@@ -1084,17 +989,28 @@ xlator_mem_cleanup(xlator_t *this)
if (ctx->active) {
top = ctx->active->first;
LOCK(&ctx->volfile_lock);
- /* TODO here we have leak for xlator node in a graph */
- /* Need to move only top xlator from a graph */
for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
victim = (*trav_p)->xlator;
if (victim->call_cleanup && !strcmp(victim->name, this->name)) {
+ graph_cleanup = _gf_true;
(*trav_p) = (*trav_p)->next;
break;
}
}
UNLOCK(&ctx->volfile_lock);
}
+
+ if (graph_cleanup) {
+ prev = this;
+ graph = ctx->active;
+ pthread_mutex_lock(&graph->mutex);
+ while (prev) {
+ trav = prev->next;
+ GF_FREE(prev);
+ prev = trav;
+ }
+ pthread_mutex_unlock(&graph->mutex);
+ }
}
void
@@ -1394,8 +1310,21 @@ xlator_destroy(xlator_t *xl)
return 0;
}
+static int32_t
+gf_bin_to_string(char *dst, size_t size, void *src, size_t len)
+{
+ if (len >= size) {
+ return EINVAL;
+ }
+
+ memcpy(dst, src, len);
+ dst[len] = 0;
+
+ return 0;
+}
+
int
-is_gf_log_command(xlator_t *this, const char *name, char *value)
+is_gf_log_command(xlator_t *this, const char *name, char *value, size_t size)
{
xlator_t *trav = NULL;
char key[1024] = {
@@ -1407,7 +1336,11 @@ is_gf_log_command(xlator_t *this, const char *name, char *value)
glusterfs_ctx_t *ctx = NULL;
if (!strcmp("trusted.glusterfs.syslog", name)) {
- ret = gf_string2boolean(value, &syslog_flag);
+ ret = gf_bin_to_string(key, sizeof(key), value, size);
+ if (ret != 0) {
+ goto out;
+ }
+ ret = gf_string2boolean(key, &syslog_flag);
if (ret) {
ret = EOPNOTSUPP;
goto out;
@@ -1423,7 +1356,12 @@ is_gf_log_command(xlator_t *this, const char *name, char *value)
if (fnmatch("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE))
goto out;
- log_level = glusterd_check_log_level(value);
+ ret = gf_bin_to_string(key, sizeof(key), value, size);
+ if (ret != 0) {
+ goto out;
+ }
+
+ log_level = glusterd_check_log_level(key);
if (log_level == -1) {
ret = EOPNOTSUPP;
goto out;
@@ -1431,9 +1369,9 @@ is_gf_log_command(xlator_t *this, const char *name, char *value)
/* Some crude way to change the log-level of process */
if (!strcmp(name, "trusted.glusterfs.set-log-level")) {
- gf_msg("glusterfs", gf_log_get_loglevel(), 0, LG_MSG_SET_LOG_LEVEL,
- "setting log level to %d (old-value=%d)", log_level,
- gf_log_get_loglevel());
+ gf_smsg("glusterfs", gf_log_get_loglevel(), 0, LG_MSG_SET_LOG_LEVEL,
+ "new-value=%d", log_level, "old-value=%d",
+ gf_log_get_loglevel(), NULL);
gf_log_set_loglevel(this->ctx, log_level);
ret = 0;
goto out;
@@ -1441,9 +1379,9 @@ is_gf_log_command(xlator_t *this, const char *name, char *value)
if (!strcmp(name, "trusted.glusterfs.fuse.set-log-level")) {
/* */
- gf_msg(this->name, gf_log_get_xl_loglevel(this), 0,
- LG_MSG_SET_LOG_LEVEL, "setting log level to %d (old-value=%d)",
- log_level, gf_log_get_xl_loglevel(this));
+ gf_smsg(this->name, gf_log_get_xl_loglevel(this), 0,
+ LG_MSG_SET_LOG_LEVEL, "new-value=%d", log_level, "old-value=%d",
+ gf_log_get_xl_loglevel(this), NULL);
gf_log_set_xl_loglevel(this, log_level);
ret = 0;
goto out;
@@ -1459,10 +1397,9 @@ is_gf_log_command(xlator_t *this, const char *name, char *value)
while (trav) {
snprintf(key, 1024, "trusted.glusterfs.%s.set-log-level", trav->name);
if (fnmatch(name, key, FNM_NOESCAPE) == 0) {
- gf_msg(trav->name, gf_log_get_xl_loglevel(trav), 0,
- LG_MSG_SET_LOG_LEVEL,
- "setting log level to %d (old-value=%d)", log_level,
- gf_log_get_xl_loglevel(trav));
+ gf_smsg(trav->name, gf_log_get_xl_loglevel(trav), 0,
+ LG_MSG_SET_LOG_LEVEL, "new-value%d", log_level,
+ "old-value=%d", gf_log_get_xl_loglevel(trav), NULL);
gf_log_set_xl_loglevel(trav, log_level);
ret = 0;
}
@@ -1494,9 +1431,7 @@ glusterd_check_log_level(const char *value)
}
if (log_level == -1)
- gf_msg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_FAILED,
- "Invalid log-level. possible values are "
- "DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE");
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_INIT, NULL);
return log_level;
}
@@ -1555,3 +1490,104 @@ glusterfs_delete_volfile_checksum(glusterfs_ctx_t *ctx, const char *volfile_id)
return 0;
}
+
+/*
+ The function is required to take dict ref for every xlator at graph.
+ At the time of compare graph topology create a graph and populate
+ key values in the dictionary, after finished graph comparison we do destroy
+ the new graph.At the time of construct graph we don't take any reference
+ so to avoid dict leak at the of destroying graph due to ref counter underflow
+ we need to call dict_ref here.
+
+*/
+
+void
+gluster_graph_take_reference(xlator_t *tree)
+{
+ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
+ return;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ if (prev->options)
+ dict_ref(prev->options);
+ prev = trav;
+ }
+ return;
+}
+
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name)
+{
+ const char *mux_daemons[] = {"glustershd", NULL};
+ int i;
+
+ if (!name)
+ return _gf_false;
+
+ for (i = 0; mux_daemons[i]; i++) {
+ if (!strcmp(name, mux_daemons[i]))
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+xlator_is_cleanup_starting(xlator_t *this)
+{
+ gf_boolean_t cleanup = _gf_false;
+ glusterfs_graph_t *graph = NULL;
+ xlator_t *xl = NULL;
+
+ if (!this) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_OBJECT_NULL, "xlator",
+ NULL);
+ goto out;
+ }
+
+ graph = this->graph;
+ if (!graph) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_NOT_SET,
+ "name=%s", this->name, NULL);
+ goto out;
+ }
+
+ xl = graph->first;
+ if (xl && xl->cleanup_starting)
+ cleanup = _gf_true;
+out:
+ return cleanup;
+}
+
+int
+graph_total_client_xlator(glusterfs_graph_t *graph)
+{
+ xlator_t *xl = NULL;
+ int count = 0;
+
+ if (!graph) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_OBJECT_NULL, "graph",
+ NULL);
+ goto out;
+ }
+
+ xl = graph->first;
+ if (!strcmp(xl->type, "protocol/server")) {
+ gf_msg_debug(xl->name, 0, "Return because it is a server graph");
+ return 0;
+ }
+
+ while (xl) {
+ if (strcmp(xl->type, "protocol/client") == 0) {
+ count++;
+ }
+ xl = xl->next;
+ }
+out:
+ return count;
+}
diff --git a/rfc.sh b/rfc.sh
index 607fd7528fb..e7faec9ea0f 100755
--- a/rfc.sh
+++ b/rfc.sh
@@ -4,7 +4,29 @@
# i.e. where we are interested in the result of a command,
# we have to run the command in an if-statement.
-ORIGIN=${GLUSTER_ORIGIN:-origin}
+UPSTREAM=${GLUSTER_UPSTREAM}
+if [ "x$UPSTREAM" -eq "x" ]; then
+ for rmt in $(git remote); do
+ rmt_repo=$(git remote show $rmt -n | grep Fetch | awk '{ print $3 }');
+ if [ $rmt_repo -eq "git@github:gluster/glusterfs" ]; then
+ UPSTREAM=$rmt
+ echo "Picked $rmt as upstream remote"
+ break
+ fi
+ done
+fi
+
+USER_REPO=${GLUSTER_USER_REPO:-origin}
+if [ "x${USER_REPO}" -eq "x${UPSTREAM}" ] ; then
+ echo "When you submit patches, it should get submitted to your fork, not to upstream directly"
+ echo "If you are not sure, check `for rmt in $(git remote); do git remote show $rmt -n; done`"
+ echo "And pick the correct remote you would like to push to and do `export GLUSTER_USER_REPO=$rmt`"
+ echo ""
+ echo "Exiting..."
+ exit 1
+fi
+
+
while getopts "v" opt; do
case $opt in
@@ -18,7 +40,7 @@ done
shift $((OPTIND-1))
-branch="master";
+branch="devel";
set_hooks_commit_msg()
{
@@ -50,21 +72,21 @@ is_num()
backport_id_message()
{
echo ""
- echo "This commit is to a non-master branch, and hence is treated as a backport."
+ echo "This commit is to a non-devel branch, and hence is treated as a backport."
echo ""
echo "For backports we would like to retain the same gerrit Change-Id across"
echo "branches. On auto inspection it is found that a gerrit Change-Id is"
- echo "missing, or the Change-Id is not found on your local master"
+ echo "missing, or the Change-Id is not found on your local devel branch"
echo ""
echo "This could mean a few things:"
echo " 1. This is not a backport, hence choose Y on the prompt to proceed"
- echo " 2. Your $ORIGIN master is not up to date, hence the script is unable"
- echo " to find the corresponding Change-Id on master. Either choose N,"
+ echo " 2. Your $USER_REPO/devel is not up to date, hence the script is unable"
+ echo " to find the corresponding Change-Id on devel. Either choose N,"
echo " 'git fetch', and try again, OR if you are sure you used the"
echo " same Change-Id, choose Y at the prompt to proceed"
echo " 3. You commented or removed the Change-Id in your commit message after"
echo " cherry picking the commit. Choose N, fix the commit message to"
- echo " use the same Change-Id as master (git commit --amend), resubmit"
+ echo " use the same Change-Id as 'devel' (git commit --amend), resubmit"
echo ""
}
@@ -72,8 +94,8 @@ check_backport()
{
moveon='N'
- # Backports are never made to master
- if [ $branch = "master" ]; then
+ # Backports are never made to 'devel'
+ if [ $branch = "devel" ]; then
return;
fi
@@ -86,22 +108,22 @@ check_backport()
echo -n "Did not find a Change-Id for a possible backport. Continue (y/N): "
read moveon
else
- # Search master for the same change ID (rebase_changes has run, so we
+ # Search 'devel' for the same change ID (rebase_changes has run, so we
# should never not find a Change-Id)
- mchangeid=$(git log $ORIGIN/master --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}')
+ mchangeid=$(git log $UPSTREAM/devel --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}')
- # Check if we found the change ID on master, else throw a message to
+ # Check if we found the change ID on 'devel', else throw a message to
# decide if we should continue.
- # NOTE: If master was not rebased, we will not find the Change-ID and
+ # NOTE: If 'devel' was not rebased, we will not find the Change-ID and
# could hit a false positive case here (or if someone checks out some
- # other branch as master).
+ # other branch as 'devel').
if [ "${mchangeid}" = "${changeid}" ]; then
moveon="Y"
else
backport_id_message;
echo "Change-Id of commit: $changeid"
- echo "Change-Id on master: $mchangeid"
- echo -n "Did not find mentioned Change-Id on master for a possible backport. Continue (y/N): "
+ echo "Change-Id on devel: $mchangeid"
+ echo -n "Did not find mentioned Change-Id on 'devel' for a possible backport. Continue (y/N): "
read moveon
fi
fi
@@ -116,10 +138,51 @@ check_backport()
rebase_changes()
{
- GIT_EDITOR=$0 git rebase -i $ORIGIN/$branch;
+ GIT_EDITOR=$0 git rebase -i $UPSTREAM/$branch;
}
+# Regex elaborated:
+# grep options:
+# -w -> --word-regexp (from the man page)
+# Select only those lines containing matches that form whole words.
+# The test is that the matching substring must either be at the
+# beginning of the line, or preceded by a non-word constituent
+# character. Similarly, it must be either at the end of the line or
+# followed by a non-word constituent character. Word-constituent
+# characters are letters, digits, and the underscore.
+#
+# IOW, the above helps us find the pattern with leading or training
+# spaces or non word consituents like , or ;
+#
+# -i -> --ignore-case (case insensitive search)
+#
+# -o -> --only-matching (only print matching portion of the line)
+#
+# -E -> --extended-regexp (use extended regular expression)
+#
+# ^
+# The search begins at the start of each line
+#
+# [[:space:]]*
+# Any number of spaces is accepted
+#
+# (Fixes|Updates)
+# Finds 'Fixes' OR 'Updates' in any case combination
+#
+# (:)?
+# Followed by an optional : (colon)
+#
+# [[:space:]]+
+# Followed by 1 or more spaces
+#
+# #
+# Followed by #
+#
+# [[:digit:]]+
+# Followed by 1 or more digits
+REFRE="^[[:space:]]*(Fixes|Updates)(:)?[[:space:]]+#[[:digit:]]+"
+
editor_mode()
{
if [ $(basename "$1") = "git-rebase-todo" ]; then
@@ -130,42 +193,32 @@ editor_mode()
if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then
# see note above function warn_reference_missing for regex elaboration
# Lets first check for github issues
- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?#[[:digit:]]+" | awk -F '#' '{print $2}');
- if [ "x${ref}" = "x" ]; then
- # if not found, check for bugs
- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+bz#[[:digit:]]+" | awk -F '#' '{print $2}');
- fi
-
+ ref=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}');
if [ "x${ref}" != "x" ]; then
return;
fi
while true; do
echo Commit: "\"$(head -n 1 $1)\""
- echo -n "Reference (Bugzilla ID or Github Issue ID): "
- read bug
- if [ -z "$bug" ]; then
+ echo -n "Github Issue ID: "
+ read issue
+ if [ -z "$issue" ]; then
return;
fi
- if ! is_num "$bug"; then
- echo "Invalid reference ID ($bug)!!!";
+ if ! is_num "$issue"; then
+ echo "Invalid Github Issue ID!!!";
continue;
fi
- bz_string="bz"
- if [ $bug -lt 742000 ]; then
- bz_string=""
- fi
-
- echo "Select yes '(y)' if this patch fixes the bug/feature completely,"
+ echo "Select yes '(y)' if this patch fixes the issue/feature completely,"
echo -n "or is the last of the patchset which brings feature (Y/n): "
read fixes
- fixes_string="fixes"
+ fixes_string="Fixes"
if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then
- fixes_string="updates"
+ fixes_string="Updates"
fi
- sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: ${bz_string}#${bug}/;}" $1 > $1.new && \
+ sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: #${issue}/;}" $1 > $1.new && \
mv $1.new $1;
return;
done
@@ -181,109 +234,33 @@ EOF
assert_diverge()
{
- git diff $ORIGIN/$branch..HEAD | grep -q .;
+ git diff $UPSTREAM/$branch..HEAD | grep -q .;
}
-check_patches_for_coding_style()
-{
- git fetch $ORIGIN;
-
- check_patch_script=./build-aux/checkpatch.pl
- if [ ! -e ${check_patch_script} ] ; then
- echo "${check_patch_script} is not executable .. abort"
- exit 1
- fi
-
- # The URL of our Gerrit server
- export GERRIT_URL="review.gluster.org"
-
- echo "Running coding guidelines check ..."
- head=$(git rev-parse --abbrev-ref HEAD)
- # Kludge: "1>&2 && echo $? || echo $?" is to get around
- # "-e" from script invocation
- RES=$(git format-patch --stdout $ORIGIN/${branch}..${head} \
- | ${check_patch_script} --strict --terse - 1>&2 && echo $? || echo $?)
- if [ "$RES" -eq 1 ] ; then
- echo "Errors caught, get details by:"
- echo " git format-patch --stdout $ORIGIN/${branch}..${head} \\"
- echo " | ${check_patch_script} --strict --gerrit-url ${GERRIT_URL} -"
- echo "and correct errors"
- exit 1
- elif [ "$RES" -eq 2 ] ; then
- echo "Warnings or checks caught, get details by:"
- echo " git format-patch --stdout $ORIGIN/${branch}..${head} \\"
- echo " | ${check_patch_script} --strict --gerrit-url ${GERRIT_URL} -"
- echo -n "Do you want to continue anyway [no/yes]: "
- read yesno
- if [ "${yesno}" != "yes" ] ; then
- echo "Aborting..."
- exit 1
- fi
- fi
-}
-
-# Regex elaborated:
-# grep -w -> --word-regexp (from the man page)
-# Select only those lines containing matches that form whole words.
-# The test is that the matching substring must either be at the
-# beginning of the line, or preceded by a non-word constituent
-# character. Similarly, it must be either at the end of the line or
-# followed by a non-word constituent character. Word-constituent
-# characters are letters, digits, and the underscore.
-# IOW, the above helps us find the pattern with leading or training spaces
-# or non word consituents like , or ;
-#
-# [fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])
-# Finds 'fixes' OR 'updates' in any case combination
-#
-# (:)?
-# Followed by an optional : (colon)
-#
-# [[:space:]]+
-# followed by 1 or more spaces
-#
-# (gluster\/glusterfs)?
-# Followed by 0 or more gluster/glusterfs
-#
-# #
-# Followed by #
-#
-# [[:digit:]]+
-# Followed by 1 or more digits
warn_reference_missing()
{
echo ""
echo "=== Missing a reference in commit! ==="
echo ""
- echo "Gluster commits are made with a reference to a bug or a github issue"
- echo ""
- echo "Submissions that are enhancements (IOW, not functional"
- echo "bug fixes, but improvements of any nature to the code) are tracked"
- echo "using github issues [1]."
+ echo "Gluster commits are made with a reference to a github issue"
echo ""
- echo "Submissions that are bug fixes are tracked using Bugzilla [2]."
+ echo "A check on the commit message, reveals that there is no "
+ echo "github issue referenced in the commit message."
echo ""
- echo "A check on the commit message, reveals that there is no bug or"
- echo "github issue referenced in the commit message"
+ echo "https://github.com/gluster/glusterfs/issues/new"
echo ""
- echo "[1] https://github.com/gluster/glusterfs/issues/new"
- echo "[2] https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS"
+ echo "Please open an issue and reference the same in the commit message "
+ echo "using the following tags:"
echo ""
- echo "Please file an issue or a bug report and reference the same in the"
- echo "commit message using the following tags:"
- echo "GitHub Issues:"
- echo "\"Fixes: gluster/glusterfs#n\" OR \"Updates: gluster/glusterfs#n\","
- echo "\"Fixes: #n\" OR \"Updates: #n\","
- echo "Bugzilla ID:"
- echo "\"Fixes: bz#n\" OR \"Updates: bz#n\","
- echo "where n is the issue or bug number"
+ echo "\"Fixes: #NNNN\" OR \"Updates: #NNNN\","
+ echo "where NNNN is the issue id"
echo ""
echo "You may abort the submission choosing 'N' below and use"
echo "'git commit --amend' to add the issue reference before posting"
echo "to gerrit."
echo ""
- echo -n "Missing reference to a bug or a github issue. Continue (y/N): "
+ echo -n "Missing reference to a github issue. Continue (y/N): "
read moveon
if [ "${moveon}" = 'Y' ] || [ "${moveon}" = 'y' ]; then
return;
@@ -303,7 +280,7 @@ main()
return;
fi
- # check_patches_for_coding_style
+ git fetch $UPSTREAM;
rebase_changes;
@@ -311,12 +288,12 @@ main()
assert_diverge;
- # see note above function warn_reference_missing for regex elaboration
- reference=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?(bz)?#[[:digit:]]+" | awk -F '#' '{print $2}');
+ # see note above variable REFRE for regex elaboration
+ reference=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}');
- # If this is a commit against master and does not have a bug ID or a github
+ # If this is a commit against 'devel' and does not have a github
# issue reference. Warn the contributor that one of the 2 is required
- if [ -z "${reference}" ] && [ $branch = "master" ]; then
+ if [ -z "${reference}" ] && [ $branch = "devel" ]; then
warn_reference_missing;
fi
@@ -344,9 +321,9 @@ main()
fi
if [ -z "${reference}" ]; then
- $drier git push $ORIGIN HEAD:refs/for/$branch/rfc;
+ $drier git push $USER_REPO HEAD:temp_${branch}/$(date +%Y-%m-%d_%s);
else
- $drier git push $ORIGIN HEAD:refs/for/$branch/ref-${reference};
+ $drier git push $USER_REPO HEAD:issue${reference}_${branch};
fi
}
diff --git a/rpc/rpc-lib/src/Makefile.am b/rpc/rpc-lib/src/Makefile.am
index 81a96476883..35c9db07e7f 100644
--- a/rpc/rpc-lib/src/Makefile.am
+++ b/rpc/rpc-lib/src/Makefile.am
@@ -2,7 +2,7 @@ lib_LTLIBRARIES = libgfrpc.la
libgfrpc_la_SOURCES = auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c \
rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c \
- rpc-drc.c $(CONTRIBDIR)/sunrpc/xdr_sizeof.c rpc-clnt-ping.c \
+ rpc-drc.c rpc-clnt-ping.c \
autoscale-threads.c mgmt-pmap.c
EXTRA_DIST = libgfrpc.sym
diff --git a/rpc/rpc-lib/src/auth-glusterfs.c b/rpc/rpc-lib/src/auth-glusterfs.c
index d569a0403f8..69a96f7512f 100644
--- a/rpc/rpc-lib/src/auth-glusterfs.c
+++ b/rpc/rpc-lib/src/auth-glusterfs.c
@@ -9,8 +9,7 @@
*/
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
#include "xdr-common.h"
#include "rpc-common-xdr.h"
diff --git a/rpc/rpc-lib/src/auth-null.c b/rpc/rpc-lib/src/auth-null.c
index 46046e8e440..6d059b9da50 100644
--- a/rpc/rpc-lib/src/auth-null.c
+++ b/rpc/rpc-lib/src/auth-null.c
@@ -9,8 +9,7 @@
*/
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
int
auth_null_request_init(rpcsvc_request_t *req, void *priv)
diff --git a/rpc/rpc-lib/src/auth-unix.c b/rpc/rpc-lib/src/auth-unix.c
index c53870fcf94..61d475a5e84 100644
--- a/rpc/rpc-lib/src/auth-unix.c
+++ b/rpc/rpc-lib/src/auth-unix.c
@@ -9,8 +9,7 @@
*/
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
int
diff --git a/rpc/rpc-lib/src/autoscale-threads.c b/rpc/rpc-lib/src/autoscale-threads.c
index d629a1cd430..a954ae7a27a 100644
--- a/rpc/rpc-lib/src/autoscale-threads.c
+++ b/rpc/rpc-lib/src/autoscale-threads.c
@@ -8,7 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "gf-event.h"
+#include <glusterfs/gf-event.h>
#include "rpcsvc.h"
void
@@ -18,5 +18,5 @@ rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr)
int thread_count = pool->eventthreadcount;
pool->auto_thread_count += incr;
- (void)event_reconfigure_threads(pool, thread_count + incr);
+ (void)gf_event_reconfigure_threads(pool, thread_count + incr);
}
diff --git a/rpc/rpc-lib/src/libgfrpc.sym b/rpc/rpc-lib/src/libgfrpc.sym
index 4f42485044f..e026d80259b 100644
--- a/rpc/rpc-lib/src/libgfrpc.sym
+++ b/rpc/rpc-lib/src/libgfrpc.sym
@@ -3,7 +3,6 @@ rpcclnt_cbk_program_register
rpc_clnt_cleanup_and_start
rpc_clnt_connection_cleanup
rpc_clnt_disable
-rpc_clnt_disconnect
rpc_clnt_new
rpc_clnt_reconfig
rpc_clnt_reconnect
@@ -26,6 +25,7 @@ rpcsvc_drc_priv
rpcsvc_drc_reconfigure
rpcsvc_get_program_vector_sizer
rpcsvc_init
+rpcsvc_destroy
rpcsvc_init_options
rpcsvc_listener_destroy
rpcsvc_program_register
@@ -65,3 +65,4 @@ rpc_transport_unix_options_build
rpc_transport_unref
rpc_clnt_mgmt_pmap_signout
rpcsvc_autoscale_threads
+rpcsvc_statedump
diff --git a/rpc/rpc-lib/src/mgmt-pmap.c b/rpc/rpc-lib/src/mgmt-pmap.c
index 344ec56bbf7..25a7148e5a3 100644
--- a/rpc/rpc-lib/src/mgmt-pmap.c
+++ b/rpc/rpc-lib/src/mgmt-pmap.c
@@ -12,7 +12,6 @@
#include "protocol-common.h"
#include "rpc-clnt.h"
#include "xdr-generic.h"
-#include "xlator.h"
/* Defining a minimal RPC client program for portmap signout
*/
@@ -35,7 +34,9 @@ mgmt_pmap_signout_cbk(struct rpc_req *req, struct iovec *iov, int count,
0,
};
int ret = 0;
+ call_frame_t *frame = NULL;
+ frame = myframe;
if (-1 == req->rpc_status) {
rsp.op_ret = -1;
rsp.op_errno = EINVAL;
@@ -56,6 +57,10 @@ mgmt_pmap_signout_cbk(struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
out:
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+
return 0;
}
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 4950857ae9e..0cb5862e9a9 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -68,6 +68,7 @@ enum gf_fop_procnum {
GFS3_OP_ICREATE,
GFS3_OP_NAMELINK,
GFS3_OP_PUT,
+ GFS3_OP_COPY_FILE_RANGE,
GFS3_OP_MAXVALUE,
};
@@ -244,6 +245,8 @@ enum glusterd_brick_procnum {
GLUSTERD_NODE_BITROT,
GLUSTERD_BRICK_ATTACH,
GLUSTERD_DUMP_METRICS,
+ GLUSTERD_SVC_ATTACH,
+ GLUSTERD_SVC_DETACH,
GLUSTERD_BRICK_MAXVALUE,
};
@@ -306,6 +309,7 @@ enum glusterd_mgmt_v3_procnum {
GLUSTERD_MGMT_V3_PRE_VALIDATE,
GLUSTERD_MGMT_V3_BRICK_OP,
GLUSTERD_MGMT_V3_COMMIT,
+ GLUSTERD_MGMT_V3_POST_COMMIT,
GLUSTERD_MGMT_V3_POST_VALIDATE,
GLUSTERD_MGMT_V3_UNLOCK,
GLUSTERD_MGMT_V3_MAXVALUE,
diff --git a/rpc/rpc-lib/src/rpc-clnt-ping.c b/rpc/rpc-lib/src/rpc-clnt-ping.c
index 7c2026f9086..31f17841bea 100644
--- a/rpc/rpc-lib/src/rpc-clnt-ping.c
+++ b/rpc/rpc-lib/src/rpc-clnt-ping.c
@@ -10,14 +10,14 @@
#include "rpc-clnt.h"
#include "rpc-clnt-ping.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "xdr-rpcclnt.h"
#include "rpc-transport.h"
#include "protocol-common.h"
-#include "mem-pool.h"
+#include <glusterfs/mem-pool.h>
#include "xdr-rpc.h"
#include "rpc-common-xdr.h"
-#include "timespec.h"
+#include <glusterfs/timespec.h>
char *clnt_ping_procs[GF_DUMP_MAXVALUE] = {
[GF_DUMP_PING] = "NULL",
@@ -122,7 +122,7 @@ rpc_clnt_ping_timer_expired(void *rpc_ptr)
goto out;
}
- clock_gettime(CLOCK_REALTIME, &current);
+ timespec_now_realtime(&current);
pthread_mutex_lock(&conn->lock);
{
unref = rpc_clnt_remove_ping_timer_locked(rpc);
diff --git a/rpc/rpc-lib/src/rpc-clnt-ping.h b/rpc/rpc-lib/src/rpc-clnt-ping.h
index fcbac6f9e21..e5466a828c2 100644
--- a/rpc/rpc-lib/src/rpc-clnt-ping.h
+++ b/rpc/rpc-lib/src/rpc-clnt-ping.h
@@ -8,6 +8,7 @@
cases as published by the Free Software Foundation.
*/
+struct rpc_clnt;
#define RPC_DEFAULT_PING_TIMEOUT 30
void
rpc_clnt_check_and_start_ping(struct rpc_clnt *rpc_ptr);
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index b26d645bb12..517037c4a5d 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -12,11 +12,11 @@
#include "rpc-clnt.h"
#include "rpc-clnt-ping.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "xdr-rpcclnt.h"
#include "rpc-transport.h"
#include "protocol-common.h"
-#include "mem-pool.h"
+#include <glusterfs/mem-pool.h>
#include "xdr-rpc.h"
#include "rpc-common-xdr.h"
@@ -97,7 +97,7 @@ call_bail(void *data)
struct saved_frame *saved_frame = NULL;
struct saved_frame *trav = NULL;
struct saved_frame *tmp = NULL;
- char frame_sent[256] = {
+ char frame_sent[GF_TIMESTR_SIZE] = {
0,
};
struct timespec timeout = {
@@ -105,7 +105,6 @@ call_bail(void *data)
};
char peerid[UNIX_PATH_MAX] = {0};
gf_boolean_t need_unref = _gf_false;
- int len;
GF_VALIDATE_OR_GOTO("client", data, out);
@@ -165,11 +164,8 @@ call_bail(void *data)
list_for_each_entry_safe(trav, tmp, &list, list)
{
- gf_time_fmt(frame_sent, sizeof frame_sent, trav->saved_at.tv_sec,
- gf_timefmt_FT);
- len = strlen(frame_sent);
- snprintf(frame_sent + len, sizeof(frame_sent) - len,
- ".%" GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
+ gf_time_fmt_tv(frame_sent, sizeof frame_sent, &trav->saved_at,
+ gf_timefmt_FT);
gf_log(conn->name, GF_LOG_ERROR,
"bailing out frame type(%s), op(%s(%d)), xid = 0x%x, "
@@ -317,20 +313,15 @@ saved_frames_unwind(struct saved_frames *saved_frames)
{
struct saved_frame *trav = NULL;
struct saved_frame *tmp = NULL;
- char timestr[1024] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
- int len;
list_splice_init(&saved_frames->lk_sf.list, &saved_frames->sf.list);
list_for_each_entry_safe(trav, tmp, &saved_frames->sf.list, list)
{
- gf_time_fmt(timestr, sizeof timestr, trav->saved_at.tv_sec,
- gf_timefmt_FT);
- len = strlen(timestr);
- snprintf(timestr + len, sizeof(timestr) - len, ".%" GF_PRI_SUSECONDS,
- trav->saved_at.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &trav->saved_at, gf_timefmt_FT);
if (!trav->rpcreq || !trav->rpcreq->prog)
continue;
@@ -376,19 +367,20 @@ rpc_clnt_reconnect(void *conn_ptr)
struct timespec ts = {0, 0};
struct rpc_clnt *clnt = NULL;
gf_boolean_t need_unref = _gf_false;
+ gf_boolean_t canceled_unref = _gf_false;
conn = conn_ptr;
clnt = conn->rpc_clnt;
-
pthread_mutex_lock(&conn->lock);
{
trans = conn->trans;
- if (!trans) {
- pthread_mutex_unlock(&conn->lock);
- return;
+ if (!trans)
+ goto out_unlock;
+
+ if (conn->reconnect) {
+ if (!gf_timer_call_cancel(clnt->ctx, conn->reconnect))
+ canceled_unref = _gf_true;
}
- if (conn->reconnect)
- gf_timer_call_cancel(clnt->ctx, conn->reconnect);
conn->reconnect = 0;
if ((conn->connected == 0) && !clnt->disabled) {
@@ -409,11 +401,14 @@ rpc_clnt_reconnect(void *conn_ptr)
gf_log(conn->name, GF_LOG_TRACE, "breaking reconnect chain");
}
}
+out_unlock:
pthread_mutex_unlock(&conn->lock);
rpc_clnt_unref(clnt);
if (need_unref)
rpc_clnt_unref(clnt);
+ if (canceled_unref)
+ rpc_clnt_unref(clnt);
return;
}
@@ -495,6 +490,7 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
int unref = 0;
int ret = 0;
gf_boolean_t timer_unref = _gf_false;
+ gf_boolean_t reconnect_unref = _gf_false;
if (!conn) {
goto out;
@@ -514,6 +510,12 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
timer_unref = _gf_true;
conn->timer = NULL;
}
+ if (conn->reconnect) {
+ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect);
+ if (!ret)
+ reconnect_unref = _gf_true;
+ conn->reconnect = NULL;
+ }
conn->connected = 0;
conn->disconnected = 1;
@@ -533,6 +535,8 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
if (timer_unref)
rpc_clnt_unref(clnt);
+ if (reconnect_unref)
+ rpc_clnt_unref(clnt);
out:
return 0;
}
@@ -830,7 +834,7 @@ rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
pthread_mutex_lock(&conn->lock);
{
if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {
- ts.tv_sec = 10;
+ ts.tv_sec = 3;
ts.tv_nsec = 0;
rpc_clnt_ref(clnt);
@@ -915,7 +919,7 @@ rpc_clnt_notify(rpc_transport_t *trans, void *mydata,
}
case RPC_TRANSPORT_MSG_RECEIVED: {
- clock_gettime(CLOCK_REALTIME, &conn->last_received);
+ timespec_now_realtime(&conn->last_received);
pollin = data;
if (pollin->is_reply)
@@ -929,8 +933,7 @@ rpc_clnt_notify(rpc_transport_t *trans, void *mydata,
}
case RPC_TRANSPORT_MSG_SENT: {
- clock_gettime(CLOCK_REALTIME, &conn->last_sent);
-
+ timespec_now_realtime(&conn->last_sent);
ret = 0;
break;
}
@@ -947,6 +950,7 @@ rpc_clnt_notify(rpc_transport_t *trans, void *mydata,
conn->config.remote_port = 0;
conn->connected = 1;
conn->disconnected = 0;
+ pthread_cond_broadcast(&conn->cond);
}
pthread_mutex_unlock(&conn->lock);
@@ -992,6 +996,7 @@ rpc_clnt_connection_init(struct rpc_clnt *clnt, glusterfs_ctx_t *ctx,
conn = &clnt->conn;
pthread_mutex_init(&clnt->conn.lock, NULL);
+ pthread_cond_init(&clnt->conn.cond, NULL);
conn->name = gf_strdup(name);
if (!conn->name) {
@@ -1116,8 +1121,6 @@ rpc_clnt_new(dict_t *options, xlator_t *owner, char *name,
mem_pool_destroy(rpc->saved_frames_pool);
GF_FREE(rpc);
rpc = NULL;
- if (options)
- dict_unref(options);
goto out;
}
@@ -1684,7 +1687,8 @@ rpc_clnt_submit(struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, int procnum,
goto unlock;
ret = rpc_transport_connect(conn->trans, conn->config.remote_port);
if (ret < 0) {
- gf_log(conn->name, GF_LOG_WARNING,
+ gf_log(conn->name,
+ (errno == EINPROGRESS) ? GF_LOG_DEBUG : GF_LOG_WARNING,
"error returned while attempting to "
"connect to host:%s, port:%d",
conn->config.remote_host, conn->config.remote_port);
@@ -1778,7 +1782,7 @@ rpc_clnt_trigger_destroy(struct rpc_clnt *rpc)
* ref*/
conn = &rpc->conn;
trans = conn->trans;
- rpc_clnt_disconnect(rpc);
+ rpc_clnt_disable(rpc);
/* This is to account for rpc_clnt_disable that might have been called
* before rpc_clnt_unref */
@@ -1818,6 +1822,7 @@ rpc_clnt_destroy(struct rpc_clnt *rpc)
saved_frames_destroy(saved_frames);
pthread_mutex_destroy(&rpc->lock);
pthread_mutex_destroy(&rpc->conn.lock);
+ pthread_cond_destroy(&rpc->conn.cond);
/* mem-pool should be destroyed, otherwise,
it will cause huge memory leaks */
@@ -1850,7 +1855,7 @@ rpc_clnt_unref(struct rpc_clnt *rpc)
return rpc;
}
-void
+int
rpc_clnt_disable(struct rpc_clnt *rpc)
{
rpc_clnt_connection_t *conn = NULL;
@@ -1894,81 +1899,9 @@ rpc_clnt_disable(struct rpc_clnt *rpc)
}
pthread_mutex_unlock(&conn->lock);
+ ret = -1;
if (trans) {
- rpc_transport_disconnect(trans, _gf_true);
- /* The auth_value was being reset to AUTH_GLUSTERFS_v2.
- * if (clnt->auth_value)
- * clnt->auth_value = AUTH_GLUSTERFS_v2;
- * It should not be reset here. The disconnect during
- * portmap request can race with handshake. If handshake
- * happens first and disconnect later, auth_value would set
- * to default value and it never sets back to actual auth_value
- * supported by server. But it's important to set to lower
- * version supported in the case where the server downgrades.
- * So moving this code to RPC_TRANSPORT_CONNECT. Note that
- * CONNECT cannot race with handshake as by nature it is
- * serialized with handhake. An handshake can happen only
- * on a connected transport and hence its strictly serialized.
- */
- }
-
- if (unref)
- rpc_clnt_unref(rpc);
-
- if (timer_unref)
- rpc_clnt_unref(rpc);
-
- if (reconnect_unref)
- rpc_clnt_unref(rpc);
-
-out:
- return;
-}
-
-void
-rpc_clnt_disconnect(struct rpc_clnt *rpc)
-{
- rpc_clnt_connection_t *conn = NULL;
- rpc_transport_t *trans = NULL;
- int unref = 0;
- int ret = 0;
- gf_boolean_t timer_unref = _gf_false;
- gf_boolean_t reconnect_unref = _gf_false;
-
- if (!rpc)
- goto out;
-
- conn = &rpc->conn;
-
- pthread_mutex_lock(&conn->lock);
- {
- rpc->disabled = 1;
- if (conn->timer) {
- ret = gf_timer_call_cancel(rpc->ctx, conn->timer);
- /* If the event is not fired and it actually cancelled
- * the timer, do the unref else registered call back
- * function will take care of unref.
- */
- if (!ret)
- timer_unref = _gf_true;
- conn->timer = NULL;
- }
-
- if (conn->reconnect) {
- ret = gf_timer_call_cancel(rpc->ctx, conn->reconnect);
- if (!ret)
- reconnect_unref = _gf_true;
- conn->reconnect = NULL;
- }
- conn->connected = 0;
-
- unref = rpc_clnt_remove_ping_timer_locked(rpc);
- trans = conn->trans;
- }
- pthread_mutex_unlock(&conn->lock);
-
- if (trans) {
- rpc_transport_disconnect(trans, _gf_true);
+ ret = rpc_transport_disconnect(trans, _gf_true);
/* The auth_value was being reset to AUTH_GLUSTERFS_v2.
* if (clnt->auth_value)
* clnt->auth_value = AUTH_GLUSTERFS_v2;
@@ -1994,7 +1927,7 @@ rpc_clnt_disconnect(struct rpc_clnt *rpc)
rpc_clnt_unref(rpc);
out:
- return;
+ return ret;
}
void
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index d80df7f8b96..2945265200b 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -11,9 +11,9 @@
#ifndef __RPC_CLNT_H
#define __RPC_CLNT_H
-#include "stack.h"
+#include <glusterfs/stack.h>
#include "rpc-transport.h"
-#include "timer.h"
+#include <glusterfs/timer.h>
#include "xdr-common.h"
#include "glusterfs3.h"
@@ -85,8 +85,8 @@ typedef int (*rpcclnt_cb_fn)(struct rpc_clnt *rpc, void *mydata, void *data);
*/
typedef struct rpcclnt_actor_desc {
char procname[32];
- int procnum;
rpcclnt_cb_fn actor;
+ int procnum;
} rpcclnt_cb_actor_t;
/* Describes a program and its version along with the function pointers
@@ -98,8 +98,6 @@ typedef struct rpcclnt_cb_program {
int prognum;
int progver;
rpcclnt_cb_actor_t *actors; /* All procedure handlers */
- int numactors; /* Num actors in actor array */
-
/* Program specific state handed to actors */
void *private;
@@ -108,6 +106,8 @@ typedef struct rpcclnt_cb_program {
/* Needed for passing back in cb_actor */
void *mydata;
+ int numactors; /* Num actors in actor array */
+
} rpcclnt_cb_program_t;
typedef struct rpc_auth_data {
@@ -127,6 +127,7 @@ struct rpc_clnt_config {
struct rpc_clnt_connection {
pthread_mutex_t lock;
+ pthread_cond_t cond;
rpc_transport_t *trans;
struct rpc_clnt_config config;
gf_timer_t *reconnect;
@@ -151,17 +152,17 @@ typedef struct rpc_clnt_connection rpc_clnt_connection_t;
struct rpc_req {
rpc_clnt_connection_t *conn;
struct iovec req[2];
- int reqcnt;
struct iobref *req_iobref;
struct iovec rsp[2];
int rspcnt;
+ int reqcnt;
struct iobref *rsp_iobref;
- int rpc_status;
- rpc_auth_data_t verf;
rpc_clnt_prog_t *prog;
- int procnum;
+ rpc_auth_data_t verf;
fop_cbk_fn_t cbkfn;
void *conn_private;
+ int procnum;
+ int rpc_status;
uint32_t xid;
};
@@ -182,8 +183,8 @@ typedef struct rpc_clnt {
glusterfs_ctx_t *ctx;
gf_atomic_t refcount;
- int auth_value;
xlator_t *owner;
+ int auth_value;
char disabled;
} rpc_clnt_t;
@@ -250,12 +251,9 @@ int
rpcclnt_cbk_program_register(struct rpc_clnt *svc,
rpcclnt_cb_program_t *program, void *mydata);
-void
+int
rpc_clnt_disable(struct rpc_clnt *rpc);
-void
-rpc_clnt_disconnect(struct rpc_clnt *rpc);
-
int
rpc_clnt_mgmt_pmap_signout(glusterfs_ctx_t *ctx, char *brick_name);
diff --git a/rpc/rpc-lib/src/rpc-drc.c b/rpc/rpc-lib/src/rpc-drc.c
index 50013776c86..de8dc630626 100644
--- a/rpc/rpc-lib/src/rpc-drc.c
+++ b/rpc/rpc-lib/src/rpc-drc.c
@@ -12,11 +12,9 @@
#ifndef RPC_DRC_H
#include "rpc-drc.h"
#endif
-#include "locking.h"
-#include "hashfn.h"
-#include "common-utils.h"
-#include "statedump.h"
-#include "mem-pool.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/mem-pool.h>
#include <netinet/in.h>
#include <unistd.h>
@@ -192,7 +190,7 @@ rpcsvc_get_drc_client(rpcsvc_drc_globals_t *drc,
if (!client)
goto out;
- client->ref = 0;
+ GF_ATOMIC_INIT(client->ref, 0);
client->sock_union = (union gf_sock_union) * sockaddr;
client->op_count = 0;
INIT_LIST_HEAD(&client->client_list);
@@ -248,7 +246,7 @@ static drc_client_t *
rpcsvc_drc_client_ref(drc_client_t *client)
{
GF_ASSERT(client);
- client->ref++;
+ GF_ATOMIC_INC(client->ref);
return client;
}
@@ -263,11 +261,12 @@ rpcsvc_drc_client_ref(drc_client_t *client)
static drc_client_t *
rpcsvc_drc_client_unref(rpcsvc_drc_globals_t *drc, drc_client_t *client)
{
+ uint32_t refcount;
+
GF_ASSERT(drc);
- GF_ASSERT(client->ref);
- client->ref--;
- if (!client->ref) {
+ refcount = GF_ATOMIC_DEC(client->ref);
+ if (!refcount) {
drc->client_count--;
rpcsvc_remove_drc_client(client);
client = NULL;
@@ -591,7 +590,7 @@ rpcsvc_drc_priv(rpcsvc_drc_globals_t *drc)
}
gf_proc_dump_build_key(key, "client", "%d.ref_count", i);
- gf_proc_dump_write(key, "%d", client->ref);
+ gf_proc_dump_write(key, "%" PRIu32, GF_ATOMIC_GET(client->ref));
gf_proc_dump_build_key(key, "client", "%d.op_count", i);
gf_proc_dump_write(key, "%d", client->op_count);
i++;
@@ -699,47 +698,42 @@ rpcsvc_drc_init(rpcsvc_t *svc, dict_t *options)
LOCK_INIT(&drc->lock);
svc->drc = drc;
- LOCK(&drc->lock);
-
/* Specify type of DRC to be used */
ret = dict_get_uint32(options, "nfs.drc-type", &drc_type);
if (ret) {
gf_log(GF_RPCSVC, GF_LOG_DEBUG,
- "drc type not set."
- " Continuing with default");
+ "drc type not set. Continuing with default");
drc_type = DRC_DEFAULT_TYPE;
}
- drc->type = drc_type;
-
/* Set the global cache size (no. of ops to cache) */
ret = dict_get_uint32(options, "nfs.drc-size", &drc_size);
if (ret) {
gf_log(GF_RPCSVC, GF_LOG_DEBUG,
- "drc size not set."
- " Continuing with default size");
+ "drc size not set. Continuing with default size");
drc_size = DRC_DEFAULT_CACHE_SIZE;
}
+ LOCK(&drc->lock);
+
+ drc->type = drc_type;
drc->global_cache_size = drc_size;
/* Mempool for cached ops */
drc->mempool = mem_pool_new(drc_cached_op_t, drc->global_cache_size);
if (!drc->mempool) {
+ UNLOCK(&drc->lock);
gf_log(GF_RPCSVC, GF_LOG_ERROR,
- "Failed to get mempool for"
- " DRC, drc-size: %d",
- drc->global_cache_size);
+ "Failed to get mempool for DRC, drc-size: %d", drc_size);
ret = -1;
- goto out;
+ goto post_unlock;
}
/* What percent of cache to be evicted whenever it fills up */
ret = dict_get_uint32(options, "nfs.drc-lru-factor", &drc_factor);
if (ret) {
gf_log(GF_RPCSVC, GF_LOG_DEBUG,
- "drc lru factor not set."
- " Continuing with policy default");
+ "drc lru factor not set. Continuing with policy default");
drc_factor = DRC_DEFAULT_LRU_FACTOR;
}
@@ -750,15 +744,16 @@ rpcsvc_drc_init(rpcsvc_t *svc, dict_t *options)
ret = rpcsvc_register_notify(svc, rpcsvc_drc_notify, THIS);
if (ret) {
+ UNLOCK(&drc->lock);
gf_log(GF_RPCSVC, GF_LOG_ERROR,
"registration of drc_notify function failed");
- goto out;
+ goto post_unlock;
}
- gf_log(GF_RPCSVC, GF_LOG_DEBUG, "drc init successful");
drc->status = DRC_INITIATED;
-out:
UNLOCK(&drc->lock);
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "drc init successful");
+post_unlock:
if (ret == -1) {
if (drc->mempool) {
mem_pool_destroy(drc->mempool);
diff --git a/rpc/rpc-lib/src/rpc-drc.h b/rpc/rpc-lib/src/rpc-drc.h
index 89b7c87e756..ce66430809b 100644
--- a/rpc/rpc-lib/src/rpc-drc.h
+++ b/rpc/rpc-lib/src/rpc-drc.h
@@ -13,8 +13,8 @@
#include "rpcsvc-common.h"
#include "rpcsvc.h"
-#include "locking.h"
-#include "dict.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/dict.h>
#include "rb.h"
/* per-client cache structure */
@@ -24,7 +24,7 @@ struct drc_client {
struct rb_table *rbtree;
/* no. of ops currently cached */
uint32_t op_count;
- uint32_t ref;
+ gf_atomic_uint32_t ref;
struct list_head client_list;
};
diff --git a/rpc/rpc-lib/src/rpc-lib-messages.h b/rpc/rpc-lib/src/rpc-lib-messages.h
index 9251578a121..2c0b820dbf9 100644
--- a/rpc/rpc-lib/src/rpc-lib-messages.h
+++ b/rpc/rpc-lib/src/rpc-lib-messages.h
@@ -11,7 +11,7 @@
#ifndef _RPC_LIB_MESSAGES_H_
#define _RPC_LIB_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
index 54636dcbf00..a6e201a9b36 100644
--- a/rpc/rpc-lib/src/rpc-transport.c
+++ b/rpc/rpc-lib/src/rpc-transport.c
@@ -12,17 +12,9 @@
#include <stdlib.h>
#include <stdio.h>
#include <sys/poll.h>
-#include <fnmatch.h>
#include <stdint.h>
-#include "logging.h"
#include "rpc-transport.h"
-#include "glusterfs.h"
-/* FIXME: xlator.h is needed for volume_option_t, need to define the datatype
- * in some other header
- */
-#include "xlator.h"
-#include "list.h"
#ifndef GF_OPTION_LIST_EMPTY
#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
@@ -68,17 +60,6 @@ out:
}
int32_t
-rpc_transport_get_myname(rpc_transport_t *this, char *hostname, int hostlen)
-{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO("rpc", this, out);
-
- ret = this->ops->get_myname(this, hostname, hostlen);
-out:
- return ret;
-}
-
-int32_t
rpc_transport_get_peername(rpc_transport_t *this, char *hostname, int hostlen)
{
int32_t ret = -1;
@@ -92,14 +73,10 @@ out:
int
rpc_transport_throttle(rpc_transport_t *this, gf_boolean_t onoff)
{
- int ret = 0;
-
if (!this->ops->throttle)
return -ENOSYS;
- ret = this->ops->throttle(this, onoff);
-
- return ret;
+ return this->ops->throttle(this, onoff);
}
int32_t
@@ -144,6 +121,8 @@ rpc_transport_pollin_alloc(rpc_transport_t *this, struct iovec *vector,
goto out;
}
+ msg->trans = this;
+
if (count > 1) {
msg->vectored = 1;
}
@@ -159,6 +138,31 @@ out:
return msg;
}
+void
+rpc_transport_cleanup(rpc_transport_t *trans)
+{
+ if (!trans)
+ return;
+
+ if (trans->fini)
+ trans->fini(trans);
+
+ if (trans->options) {
+ dict_unref(trans->options);
+ trans->options = NULL;
+ }
+
+ GF_FREE(trans->name);
+
+ if (trans->xl)
+ pthread_mutex_destroy(&trans->lock);
+
+ if (trans->dl_handle)
+ dlclose(trans->dl_handle);
+
+ GF_FREE(trans);
+}
+
rpc_transport_t *
rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
{
@@ -166,7 +170,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
char *name = NULL;
void *handle = NULL;
char *type = NULL;
- char str[] = "ERROR";
+ static char str[] = "ERROR";
int32_t ret = -1;
int is_tcp = 0, is_unix = 0, is_ibsdp = 0;
volume_opt_list_t *vol_opt = NULL;
@@ -191,9 +195,9 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
type = str;
/* Backward compatibility */
- ret = dict_get_str(options, "transport-type", &type);
+ ret = dict_get_str_sizen(options, "transport-type", &type);
if (ret < 0) {
- ret = dict_set_str(options, "transport-type", "socket");
+ ret = dict_set_str_sizen(options, "transport-type", "socket");
if (ret < 0)
gf_log("dict", GF_LOG_DEBUG, "setting transport-type failed");
else
@@ -215,15 +219,16 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
is_ibsdp = strcmp(type, "ib-sdp");
if ((is_tcp == 0) || (is_unix == 0) || (is_ibsdp == 0)) {
if (is_unix == 0)
- ret = dict_set_str(options, "transport.address-family", "unix");
+ ret = dict_set_str_sizen(options, "transport.address-family",
+ "unix");
if (is_ibsdp == 0)
- ret = dict_set_str(options, "transport.address-family",
- "inet-sdp");
+ ret = dict_set_str_sizen(options, "transport.address-family",
+ "inet-sdp");
if (ret < 0)
gf_log("dict", GF_LOG_DEBUG, "setting address-family failed");
- ret = dict_set_str(options, "transport-type", "socket");
+ ret = dict_set_str_sizen(options, "transport-type", "socket");
if (ret < 0)
gf_log("dict", GF_LOG_DEBUG, "setting transport-type failed");
}
@@ -232,9 +237,9 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
/* client-bind-insecure is for clients protocol, and
* bind-insecure for glusterd. Both mutually exclusive
*/
- ret = dict_get_str(options, "client-bind-insecure", &type);
+ ret = dict_get_str_sizen(options, "client-bind-insecure", &type);
if (ret)
- ret = dict_get_str(options, "bind-insecure", &type);
+ ret = dict_get_str_sizen(options, "bind-insecure", &type);
if (ret == 0) {
ret = gf_string2boolean(type, &bind_insecure);
if (ret < 0) {
@@ -253,7 +258,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
trans->bind_insecure = 1;
}
- ret = dict_get_str(options, "transport-type", &type);
+ ret = dict_get_str_sizen(options, "transport-type", &type);
if (ret < 0) {
gf_log("rpc-transport", GF_LOG_ERROR,
"'option transport-type <xx>' missing in volume '%s'",
@@ -266,7 +271,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
goto fail;
}
- if (dict_get(options, "notify-poller-death")) {
+ if (dict_get_sizen(options, "notify-poller-death")) {
trans->notify_poller_death = 1;
}
@@ -332,7 +337,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
}
}
- trans->options = options;
+ trans->options = dict_ref(options);
pthread_mutex_init(&trans->lock, NULL);
trans->xl = this;
@@ -345,6 +350,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
}
INIT_LIST_HEAD(&trans->list);
+ GF_ATOMIC_INIT(trans->disconnect_progress, 0);
return_trans = trans;
@@ -354,15 +360,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
fail:
if (!success) {
- if (trans) {
- GF_FREE(trans->name);
-
- if (trans->dl_handle)
- dlclose(trans->dl_handle);
-
- GF_FREE(trans);
- }
-
+ rpc_transport_cleanup(trans);
GF_FREE(name);
return_trans = NULL;
@@ -441,13 +439,10 @@ fail:
return ret;
}
-int32_t
+static void
rpc_transport_destroy(rpc_transport_t *this)
{
struct dnscache6 *cache = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
if (this->clnt_options)
dict_unref(this->clnt_options);
@@ -475,10 +470,6 @@ rpc_transport_destroy(rpc_transport_t *this)
}
GF_FREE(this);
-
- ret = 0;
-fail:
- return ret;
}
rpc_transport_t *
@@ -561,16 +552,17 @@ rpc_transport_keepalive_options_set(dict_t *options, int32_t interval,
GF_ASSERT(options);
GF_ASSERT((interval > 0) || (time > 0));
- ret = dict_set_int32(options, "transport.socket.keepalive-interval",
- interval);
+ ret = dict_set_int32_sizen(options, "transport.socket.keepalive-interval",
+ interval);
if (ret)
goto out;
- ret = dict_set_int32(options, "transport.socket.keepalive-time", time);
+ ret = dict_set_int32_sizen(options, "transport.socket.keepalive-time",
+ time);
if (ret)
goto out;
- ret = dict_set_int32(options, "transport.tcp-user-timeout", timeout);
+ ret = dict_set_int32_sizen(options, "transport.tcp-user-timeout", timeout);
if (ret)
goto out;
out:
@@ -578,19 +570,14 @@ out:
}
int
-rpc_transport_unix_options_build(dict_t **options, char *filepath,
+rpc_transport_unix_options_build(dict_t *dict, char *filepath,
int frame_timeout)
{
- dict_t *dict = NULL;
char *fpath = NULL;
int ret = -1;
GF_ASSERT(filepath);
- GF_ASSERT(options);
-
- dict = dict_new();
- if (!dict)
- goto out;
+ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out);
fpath = gf_strdup(filepath);
if (!fpath) {
@@ -598,62 +585,52 @@ rpc_transport_unix_options_build(dict_t **options, char *filepath,
goto out;
}
- ret = dict_set_dynstr(dict, "transport.socket.connect-path", fpath);
+ ret = dict_set_dynstr_sizen(dict, "transport.socket.connect-path", fpath);
if (ret) {
GF_FREE(fpath);
goto out;
}
- ret = dict_set_str(dict, "transport.address-family", "unix");
+ ret = dict_set_str_sizen(dict, "transport.address-family", "unix");
if (ret)
goto out;
- ret = dict_set_str(dict, "transport.socket.nodelay", "off");
+ ret = dict_set_str_sizen(dict, "transport.socket.nodelay", "off");
if (ret)
goto out;
- ret = dict_set_str(dict, "transport-type", "socket");
+ ret = dict_set_str_sizen(dict, "transport-type", "socket");
if (ret)
goto out;
- ret = dict_set_str(dict, "transport.socket.keepalive", "off");
+ ret = dict_set_str_sizen(dict, "transport.socket.keepalive", "off");
if (ret)
goto out;
if (frame_timeout > 0) {
- ret = dict_set_int32(dict, "frame-timeout", frame_timeout);
+ ret = dict_set_int32_sizen(dict, "frame-timeout", frame_timeout);
if (ret)
goto out;
}
-
- *options = dict;
out:
- if (ret && dict) {
- dict_unref(dict);
- }
return ret;
}
int
-rpc_transport_inet_options_build(dict_t **options, const char *hostname,
- int port)
+rpc_transport_inet_options_build(dict_t *dict, const char *hostname, int port,
+ char *af)
{
- dict_t *dict = NULL;
char *host = NULL;
int ret = -1;
#ifdef IPV6_DEFAULT
- char *addr_family = "inet6";
+ static char *addr_family = "inet6";
#else
- char *addr_family = "inet";
+ static char *addr_family = "inet";
#endif
- GF_ASSERT(options);
GF_ASSERT(hostname);
GF_ASSERT(port >= 1024);
-
- dict = dict_new();
- if (!dict)
- goto out;
+ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out);
host = gf_strdup((char *)hostname);
if (!host) {
@@ -661,7 +638,7 @@ rpc_transport_inet_options_build(dict_t **options, const char *hostname,
goto out;
}
- ret = dict_set_dynstr(dict, "remote-host", host);
+ ret = dict_set_dynstr_sizen(dict, "remote-host", host);
if (ret) {
gf_log(THIS->name, GF_LOG_WARNING, "failed to set remote-host with %s",
host);
@@ -669,32 +646,27 @@ rpc_transport_inet_options_build(dict_t **options, const char *hostname,
goto out;
}
- ret = dict_set_int32(dict, "remote-port", port);
+ ret = dict_set_int32_sizen(dict, "remote-port", port);
if (ret) {
gf_log(THIS->name, GF_LOG_WARNING, "failed to set remote-port with %d",
port);
goto out;
}
- ret = dict_set_str(dict, "address-family", addr_family);
+ ret = dict_set_str_sizen(dict, "address-family",
+ (af != NULL ? af : addr_family));
if (ret) {
gf_log(THIS->name, GF_LOG_WARNING, "failed to set address-family to %s",
addr_family);
goto out;
}
- ret = dict_set_str(dict, "transport-type", "socket");
+ ret = dict_set_str_sizen(dict, "transport-type", "socket");
if (ret) {
gf_log(THIS->name, GF_LOG_WARNING,
"failed to set trans-type with socket");
goto out;
}
-
- *options = dict;
out:
- if (ret && dict) {
- dict_unref(dict);
- }
-
return ret;
}
diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
index 87ea3a28dfd..c499f0bb955 100644
--- a/rpc/rpc-lib/src/rpc-transport.h
+++ b/rpc/rpc-lib/src/rpc-transport.h
@@ -56,8 +56,9 @@
struct rpc_transport_ops;
typedef struct rpc_transport rpc_transport_t;
-#include "dict.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/async.h>
#include "rpcsvc-common.h"
struct peer_info {
@@ -102,19 +103,19 @@ typedef enum {
struct rpc_transport_msg {
struct iovec *rpchdr;
- int rpchdrcount;
struct iovec *proghdr;
+ int rpchdrcount;
int proghdrcount;
struct iovec *progpayload;
- int progpayloadcount;
struct iobref *iobref;
+ int progpayloadcount;
};
typedef struct rpc_transport_msg rpc_transport_msg_t;
struct rpc_transport_rsp {
struct iovec *rsphdr;
- int rsphdr_count;
struct iovec *rsp_payload;
+ int rsphdr_count;
int rsp_payload_count;
struct iobref *rsp_iobref;
};
@@ -148,23 +149,13 @@ typedef struct rpc_transport_data rpc_transport_data_t;
struct rpc_request_info {
int prognum;
int progver;
- int procnum;
void *rpc_req; /* struct rpc_req */
rpc_transport_rsp_t rsp;
+ int procnum;
uint32_t xid;
};
typedef struct rpc_request_info rpc_request_info_t;
-struct rpc_transport_pollin {
- int count;
- void *private;
- struct iobref *iobref;
- struct iovec vector[MAX_IOVEC];
- char is_reply;
- char vectored;
-};
-typedef struct rpc_transport_pollin rpc_transport_pollin_t;
-
typedef int (*rpc_transport_notify_t)(rpc_transport_t *, void *mydata,
rpc_transport_event_t, void *data, ...);
@@ -202,11 +193,12 @@ struct rpc_transport {
int32_t outstanding_rpc_count;
struct list_head list;
- int bind_insecure;
void *dl_handle; /* handle of dlopen() */
char *ssl_name;
dict_t *clnt_options; /* store options received from
* client */
+ gf_atomic_t disconnect_progress;
+ int bind_insecure;
/* connect_failed: saves the connect() syscall status as socket_t
* member holding connect() status can't be accessed by higher gfapi
* layer or in client management notification handler functions
@@ -216,6 +208,18 @@ struct rpc_transport {
char poller_death_accept;
};
+struct rpc_transport_pollin {
+ struct rpc_transport *trans;
+ void *private;
+ struct iobref *iobref;
+ struct iovec vector[MAX_IOVEC];
+ gf_async_t async;
+ int count;
+ char is_reply;
+ char vectored;
+};
+typedef struct rpc_transport_pollin rpc_transport_pollin_t;
+
struct rpc_transport_ops {
/* no need of receive op, msg will be delivered through an event
* notification
@@ -248,9 +252,6 @@ int32_t
rpc_transport_disconnect(rpc_transport_t *this, gf_boolean_t wait);
int32_t
-rpc_transport_destroy(rpc_transport_t *this);
-
-int32_t
rpc_transport_notify(rpc_transport_t *this, rpc_transport_event_t event,
void *data, ...);
@@ -281,9 +282,6 @@ rpc_transport_get_peeraddr(rpc_transport_t *this, char *peeraddr, int addrlen,
struct sockaddr_storage *sa, size_t salen);
int32_t
-rpc_transport_get_myname(rpc_transport_t *this, char *hostname, int hostlen);
-
-int32_t
rpc_transport_get_myaddr(rpc_transport_t *this, char *peeraddr, int addrlen,
struct sockaddr_storage *sa, size_t salen);
@@ -302,10 +300,13 @@ rpc_transport_keepalive_options_set(dict_t *options, int32_t interval,
int32_t time, int32_t timeout);
int
-rpc_transport_unix_options_build(dict_t **options, char *filepath,
+rpc_transport_unix_options_build(dict_t *options, char *filepath,
int frame_timeout);
int
-rpc_transport_inet_options_build(dict_t **options, const char *hostname,
- int port);
+rpc_transport_inet_options_build(dict_t *options, const char *hostname,
+ int port, char *af);
+
+void
+rpc_transport_cleanup(rpc_transport_t *);
#endif /* __RPC_TRANSPORT_H__ */
diff --git a/rpc/rpc-lib/src/rpcsvc-auth.c b/rpc/rpc-lib/src/rpcsvc-auth.c
index da260ade0c0..8e76b4188bb 100644
--- a/rpc/rpc-lib/src/rpcsvc-auth.c
+++ b/rpc/rpc-lib/src/rpcsvc-auth.c
@@ -9,8 +9,7 @@
*/
#include "rpcsvc.h"
-#include "logging.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
extern rpcsvc_auth_t *
rpcsvc_auth_null_init(rpcsvc_t *svc, dict_t *options);
@@ -274,6 +273,44 @@ rpcsvc_set_root_squash(rpcsvc_t *svc, dict_t *options)
}
int
+rpcsvc_set_all_squash(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ uid_t anonuid = -1;
+ gid_t anongid = -1;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
+
+ ret = dict_get_str_boolean(options, "all-squash", 0);
+ if (ret != -1)
+ svc->all_squash = ret;
+ else
+ svc->all_squash = _gf_false;
+
+ ret = dict_get_uint32(options, "anonuid", &anonuid);
+ if (!ret)
+ svc->anonuid = anonuid;
+ else
+ svc->anonuid = RPC_NOBODY_UID;
+
+ ret = dict_get_uint32(options, "anongid", &anongid);
+ if (!ret)
+ svc->anongid = anongid;
+ else
+ svc->anongid = RPC_NOBODY_GID;
+
+ if (svc->all_squash)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "all squashing enabled "
+ "(uid=%d, gid=%d)",
+ svc->anonuid, svc->anongid);
+
+ return 0;
+}
+
+int
rpcsvc_auth_init(rpcsvc_t *svc, dict_t *options)
{
int ret = -1;
@@ -283,6 +320,7 @@ rpcsvc_auth_init(rpcsvc_t *svc, dict_t *options)
(void)rpcsvc_set_allow_insecure(svc, options);
(void)rpcsvc_set_root_squash(svc, options);
+ (void)rpcsvc_set_all_squash(svc, options);
(void)rpcsvc_set_addr_namelookup(svc, options);
ret = rpcsvc_auth_add_initers(svc);
if (ret == -1) {
@@ -316,6 +354,10 @@ rpcsvc_auth_reconf(rpcsvc_t *svc, dict_t *options)
if (ret)
return (-1);
+ ret = rpcsvc_set_all_squash(svc, options);
+ if (ret)
+ return (-1);
+
return rpcsvc_set_addr_namelookup(svc, options);
}
diff --git a/rpc/rpc-lib/src/rpcsvc-common.h b/rpc/rpc-lib/src/rpcsvc-common.h
index 56200b38faa..6c4ec49a6ef 100644
--- a/rpc/rpc-lib/src/rpcsvc-common.h
+++ b/rpc/rpc-lib/src/rpcsvc-common.h
@@ -12,11 +12,8 @@
#define _RPCSVC_COMMON_H
#include <pthread.h>
-#include "list.h"
-#include "compat.h"
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
typedef enum {
RPCSVC_EVENT_ACCEPT,
@@ -42,8 +39,6 @@ typedef struct rpcsvc_state {
pthread_rwlock_t rpclock;
- unsigned int memfactor;
-
/* List of the authentication schemes available. */
struct list_head authschemes;
@@ -64,6 +59,8 @@ typedef struct rpcsvc_state {
struct list_head notify;
int notify_count;
+ unsigned int memfactor;
+
xlator_t *xl; /* xlator */
void *mydata;
rpcsvc_notify_t notifyfn;
@@ -79,6 +76,7 @@ typedef struct rpcsvc_state {
gf_boolean_t allow_insecure;
gf_boolean_t register_portmap;
gf_boolean_t root_squash;
+ gf_boolean_t all_squash;
} rpcsvc_t;
/* DRC START */
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index 8dcc2947b33..39910d481bf 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -10,19 +10,16 @@
#include "rpcsvc.h"
#include "rpc-transport.h"
-#include "dict.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "list.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "xdr-rpc.h"
-#include "iobuf.h"
-#include "globals.h"
+#include <glusterfs/iobuf.h>
#include "xdr-common.h"
#include "xdr-generic.h"
#include "rpc-common-xdr.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
#include "rpc-drc.h"
#include "protocol-common.h"
@@ -36,16 +33,20 @@
#include <fnmatch.h>
#include <stdarg.h>
#include <stdio.h>
-#include <math.h>
+#include <dlfcn.h>
#ifdef IPV6_DEFAULT
#include <netconfig.h>
#endif
#include "xdr-rpcclnt.h"
-#include "glusterfs-acl.h"
+#include <glusterfs/glusterfs-acl.h>
-struct rpcsvc_program gluster_dump_prog;
+#ifndef PTHREAD_MUTEX_ADAPTIVE_NP
+#define PTHREAD_MUTEX_ADAPTIVE_NP PTHREAD_MUTEX_DEFAULT
+#endif
+
+static struct rpcsvc_program gluster_dump_prog;
#define rpcsvc_alloc_request(svc, request) \
do { \
@@ -70,68 +71,33 @@ rpcsvc_request_handler(void *arg);
static int
rpcsvc_match_subnet_v4(const char *addrtok, const char *ipaddr);
-void
+static void
rpcsvc_toggle_queue_status(rpcsvc_program_t *prog,
- rpcsvc_request_queue_t *queue, char status[])
+ rpcsvc_request_queue_t *queue,
+ unsigned long status[])
{
- int queue_index = 0, status_index = 0, set_bit = 0;
-
- if (queue != &prog->request_queue[0]) {
- queue_index = (queue - &prog->request_queue[0]);
- }
-
- status_index = queue_index / 8;
- set_bit = queue_index % 8;
-
- status[status_index] ^= (1 << set_bit);
+ unsigned queue_index = queue - prog->request_queue;
- return;
-}
-
-static int
-get_rightmost_set_bit(int n)
-{
- return log2(n & -n);
+ status[queue_index / __BITS_PER_LONG] ^= (1UL << (queue_index %
+ __BITS_PER_LONG));
}
int
rpcsvc_get_free_queue_index(rpcsvc_program_t *prog)
{
- int queue_index = 0, max_index = 0, i = 0;
- unsigned int right_most_unset_bit = 0;
-
- right_most_unset_bit = 8;
+ unsigned i, j = 0;
- max_index = gf_roof(EVENT_MAX_THREADS, 8) / 8;
- for (i = 0; i < max_index; i++) {
- if (prog->request_queue_status[i] == 0) {
- right_most_unset_bit = 0;
+ for (i = 0; i < EVENT_MAX_THREADS / __BITS_PER_LONG; i++)
+ if (prog->request_queue_status[i] != ULONG_MAX) {
+ j = __builtin_ctzl(~prog->request_queue_status[i]);
break;
- } else {
- right_most_unset_bit = get_rightmost_set_bit(
- ~prog->request_queue_status[i]);
- if (right_most_unset_bit < 8) {
- break;
- }
- }
- }
-
- if (right_most_unset_bit > 7) {
- queue_index = -1;
- } else {
- queue_index = i * 8;
- queue_index += right_most_unset_bit;
-
- if (queue_index > EVENT_MAX_THREADS) {
- queue_index = -1;
}
- }
- if (queue_index != -1) {
- prog->request_queue_status[i] |= (0x1 << right_most_unset_bit);
- }
+ if (i == EVENT_MAX_THREADS / __BITS_PER_LONG)
+ return -1;
- return queue_index;
+ prog->request_queue_status[i] |= (1UL << j);
+ return i * __BITS_PER_LONG + j;
}
rpcsvc_notify_wrapper_t *
@@ -376,6 +342,10 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
goto err;
}
+ if (svc->xl->ctx->measure_latency) {
+ timespec_now(&req->begin);
+ }
+
req->ownthread = program->ownthread;
req->synctask = program->synctask;
@@ -1525,10 +1495,18 @@ rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
size_t hdrlen = 0;
char new_iobref = 0;
rpcsvc_drc_globals_t *drc = NULL;
+ gf_latency_t *lat = NULL;
if ((!req) || (!req->trans))
return -1;
+ if (req->prog && req->begin.tv_sec) {
+ if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) {
+ timespec_now(&req->end);
+ lat = &req->prog->latencies[req->procnum];
+ gf_latency_update(lat, &req->begin, &req->end);
+ }
+ }
trans = req->trans;
for (i = 0; i < hdrcount; i++) {
@@ -1859,6 +1837,15 @@ rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
iobref);
}
+void
+rpcsvc_program_destroy(rpcsvc_program_t *program)
+{
+ if (program) {
+ GF_FREE(program->latencies);
+ GF_FREE(program);
+ }
+}
+
int
rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
{
@@ -1868,6 +1855,18 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
goto out;
}
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(prog, &svc->programs, program)
+ {
+ if ((prog->prognum == program->prognum) &&
+ (prog->progver == program->progver)) {
+ break;
+ }
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
ret = rpcsvc_program_unregister_portmap(program);
if (ret == -1) {
gf_log(GF_RPCSVC, GF_LOG_ERROR,
@@ -1884,17 +1883,6 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
goto out;
}
#endif
- pthread_rwlock_rdlock(&svc->rpclock);
- {
- list_for_each_entry(prog, &svc->programs, program)
- {
- if ((prog->prognum == program->prognum) &&
- (prog->progver == program->progver)) {
- break;
- }
- }
- }
- pthread_rwlock_unlock(&svc->rpclock);
gf_log(GF_RPCSVC, GF_LOG_DEBUG,
"Program unregistered: %s, Num: %d,"
@@ -1915,6 +1903,8 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
ret = 0;
out:
+ rpcsvc_program_destroy(prog);
+
if (ret == -1) {
if (program) {
gf_log(GF_RPCSVC, GF_LOG_ERROR,
@@ -2009,6 +1999,7 @@ rpcsvc_create_listener(rpcsvc_t *svc, dict_t *options, char *name)
listener = rpcsvc_listener_alloc(svc, trans);
if (listener == NULL) {
+ ret = -1;
goto out;
}
@@ -2016,6 +2007,7 @@ rpcsvc_create_listener(rpcsvc_t *svc, dict_t *options, char *name)
out:
if (!listener && trans) {
rpc_transport_disconnect(trans, _gf_true);
+ rpc_transport_cleanup(trans);
}
return ret;
@@ -2173,9 +2165,7 @@ rpcsvc_request_handler(void *arg)
rpcsvc_actor_t *actor = NULL;
gf_boolean_t done = _gf_false;
int ret = 0;
- struct list_head tmp_list = {
- 0,
- };
+ struct list_head tmp_list;
queue = arg;
program = queue->program;
@@ -2210,9 +2200,9 @@ rpcsvc_request_handler(void *arg)
list_for_each_entry_safe(req, tmp_req, &tmp_list, request_list)
{
- list_del_init(&req->request_list);
-
if (req) {
+ list_del_init(&req->request_list);
+
if (req->prognum == RPCSVC_INFRA_PROGRAM) {
switch (req->procnum) {
case RPCSVC_PROC_EVENT_THREAD_DEATH:
@@ -2263,6 +2253,8 @@ rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
int ret = -1, i = 0;
rpcsvc_program_t *newprog = NULL;
char already_registered = 0;
+ pthread_mutexattr_t attr[EVENT_MAX_THREADS];
+ pthread_mutexattr_t thr_attr;
if (!svc) {
goto out;
@@ -2296,21 +2288,35 @@ rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
}
memcpy(newprog, program, sizeof(*program));
+ newprog->latencies = gf_latency_new(program->numactors);
+ if (!newprog->latencies) {
+ rpcsvc_program_destroy(newprog);
+ goto out;
+ }
INIT_LIST_HEAD(&newprog->program);
+ pthread_mutexattr_init(&thr_attr);
+ pthread_mutexattr_settype(&thr_attr, PTHREAD_MUTEX_ADAPTIVE_NP);
for (i = 0; i < EVENT_MAX_THREADS; i++) {
+ pthread_mutexattr_init(&attr[i]);
+ pthread_mutexattr_settype(&attr[i], PTHREAD_MUTEX_ADAPTIVE_NP);
INIT_LIST_HEAD(&newprog->request_queue[i].request_queue);
- pthread_mutex_init(&newprog->request_queue[i].queue_lock, NULL);
+ pthread_mutex_init(&newprog->request_queue[i].queue_lock, &attr[i]);
pthread_cond_init(&newprog->request_queue[i].queue_cond, NULL);
newprog->request_queue[i].program = newprog;
}
- pthread_mutex_init(&newprog->thr_lock, NULL);
+ pthread_mutex_init(&newprog->thr_lock, &thr_attr);
pthread_cond_init(&newprog->thr_cond, NULL);
newprog->alive = _gf_true;
+ if (gf_async_ctrl.enabled) {
+ newprog->ownthread = _gf_false;
+ newprog->synctask = _gf_false;
+ }
+
/* make sure synctask gets priority over ownthread */
if (newprog->synctask)
newprog->ownthread = _gf_false;
@@ -2560,7 +2566,7 @@ rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options)
*/
dict_del(svc->options, srchkey);
if (!dict_get_str(options, srchkey, &keyval)) {
- ret = dict_set_str(svc->options, srchkey, keyval);
+ ret = dict_set_dynstr_with_alloc(svc->options, srchkey, keyval);
if (ret < 0) {
gf_log(GF_RPCSVC, GF_LOG_ERROR, "dict_set_str error");
GF_FREE(srchkey);
@@ -2592,7 +2598,7 @@ rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options)
*/
dict_del(svc->options, srchkey);
if (!dict_get_str(options, srchkey, &keyval)) {
- ret = dict_set_str(svc->options, srchkey, keyval);
+ ret = dict_set_dynstr_with_alloc(svc->options, srchkey, keyval);
if (ret < 0) {
gf_log(GF_RPCSVC, GF_LOG_ERROR, "dict_set_str error");
GF_FREE(srchkey);
@@ -2612,18 +2618,13 @@ rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options)
}
int
-rpcsvc_transport_unix_options_build(dict_t **options, char *filepath)
+rpcsvc_transport_unix_options_build(dict_t *dict, char *filepath)
{
- dict_t *dict = NULL;
char *fpath = NULL;
int ret = -1;
GF_ASSERT(filepath);
- GF_ASSERT(options);
-
- dict = dict_new();
- if (!dict)
- goto out;
+ GF_VALIDATE_OR_GOTO("rpcsvc", dict, out);
fpath = gf_strdup(filepath);
if (!fpath) {
@@ -2646,13 +2647,9 @@ rpcsvc_transport_unix_options_build(dict_t **options, char *filepath)
ret = dict_set_str(dict, "transport-type", "socket");
if (ret)
goto out;
-
- *options = dict;
out:
if (ret) {
GF_FREE(fpath);
- if (dict)
- dict_unref(dict);
}
return ret;
}
@@ -2747,6 +2744,43 @@ rpcsvc_get_throttle(rpcsvc_t *svc)
return svc->throttle;
}
+/* Function call to cleanup resources for svc
+ */
+int
+rpcsvc_destroy(rpcsvc_t *svc)
+{
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ int ret = 0;
+
+ if (!svc)
+ return ret;
+
+ list_for_each_entry_safe(listener, next, &svc->listeners, list)
+ {
+ rpcsvc_listener_destroy(listener);
+ }
+
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ list_del_init(&auth->authlist);
+ GF_FREE(auth);
+ }
+
+ rpcsvc_program_unregister(svc, &gluster_dump_prog);
+ if (svc->rxpool) {
+ mem_pool_destroy(svc->rxpool);
+ svc->rxpool = NULL;
+ }
+
+ pthread_rwlock_destroy(&svc->rpclock);
+ GF_FREE(svc);
+
+ return ret;
+}
+
/* The global RPC service initializer.
*/
rpcsvc_t *
@@ -2843,6 +2877,10 @@ rpcsvc_transport_peer_check_search(dict_t *options, char *pattern, char *ip,
}
dup_addrstr = gf_strdup(addrstr);
+ if (dup_addrstr == NULL) {
+ ret = -1;
+ goto err;
+ }
addrtok = strtok_r(dup_addrstr, ",", &svptr);
while (addrtok) {
/* CASEFOLD not present on Solaris */
@@ -3166,10 +3204,6 @@ rpcsvc_match_subnet_v4(const char *addrtok, const char *ipaddr)
if (inet_pton(AF_INET, ipaddr, &sin1.sin_addr) == 0)
goto out;
- /* Find the network socket addr of subnet pattern */
- if (inet_pton(AF_INET, netaddr, &sin2.sin_addr) == 0)
- goto out;
-
slash = strchr(netaddr, '/');
if (slash) {
*slash = '\0';
@@ -3182,9 +3216,16 @@ rpcsvc_match_subnet_v4(const char *addrtok, const char *ipaddr)
if (prefixlen > 31)
goto out;
} else {
+ /* if there is no '/', then this function wouldn't be called */
goto out;
}
+ /* Need to do this after removing '/', as inet_pton() take IP address as
+ * second argument. Once we get sin2, then comparison is oranges to orange
+ */
+ if (inet_pton(AF_INET, netaddr, &sin2.sin_addr) == 0)
+ goto out;
+
shift = IPv4_ADDR_SIZE - prefixlen;
mask.sin_addr.s_addr = htonl((uint32_t)~0 << shift);
@@ -3197,13 +3238,55 @@ out:
return ret;
}
-rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
- [GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, 0, DRC_NA},
- [GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, 0, DRC_NA},
- [GF_DUMP_PING] = {"PING", GF_DUMP_PING, rpcsvc_ping, NULL, 0, DRC_NA},
+void
+rpcsvc_program_dump(rpcsvc_program_t *prog)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_build_key(key, key_prefix, "program-number");
+ gf_proc_dump_write(key, "%d", prog->prognum);
+
+ gf_proc_dump_build_key(key, key_prefix, "program-version");
+ gf_proc_dump_write(key, "%d", prog->progver);
+
+ strncat(key_prefix, ".latency",
+ sizeof(key_prefix) - strlen(key_prefix) - 1);
+
+ for (i = 0; i < prog->numactors; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname);
+ gf_latency_statedump_and_reset(key, &prog->latencies[i]);
+ }
+}
+
+void
+rpcsvc_statedump(rpcsvc_t *svc)
+{
+ rpcsvc_program_t *prog = NULL;
+ int ret = 0;
+ ret = pthread_rwlock_tryrdlock(&svc->rpclock);
+ if (ret)
+ return;
+ {
+ list_for_each_entry(prog, &svc->programs, program)
+ {
+ rpcsvc_program_dump(prog);
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+}
+
+static rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_NULL] = {"NULL", NULL, NULL, GF_DUMP_NULL, DRC_NA, 0},
+ [GF_DUMP_DUMP] = {"DUMP", rpcsvc_dump, NULL, GF_DUMP_DUMP, DRC_NA, 0},
+ [GF_DUMP_PING] = {"PING", rpcsvc_ping, NULL, GF_DUMP_PING, DRC_NA, 0},
};
-struct rpcsvc_program gluster_dump_prog = {
+static struct rpcsvc_program gluster_dump_prog = {
.progname = "GF-DUMP",
.prognum = GLUSTER_DUMP_PROGRAM,
.progver = GLUSTER_DUMP_VERSION,
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
index d6260ca5028..7b3030926c8 100644
--- a/rpc/rpc-lib/src/rpcsvc.h
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -11,23 +11,18 @@
#ifndef _RPCSVC_H
#define _RPCSVC_H
-#include "gf-event.h"
+#include <glusterfs/gf-event.h>
#include "rpc-transport.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-#include "list.h"
-#include "iobuf.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
-#include "glusterfs.h"
-#include "xlator.h"
#include "rpcsvc-common.h"
#include <pthread.h>
#include <sys/uio.h>
#include <inttypes.h>
#include <rpc/rpc_msg.h>
-#include "compat.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/client_t.h>
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
@@ -148,12 +143,6 @@ struct rpcsvc_config {
int max_block_size;
};
-typedef struct rpcsvc_auth_data {
- int flavour;
- int datalen;
- char authdata[GF_MAX_AUTH_BYTES];
-} rpcsvc_auth_data_t;
-
#define rpcsvc_auth_flavour(au) ((au).flavour)
typedef struct drc_client drc_client_t;
@@ -201,24 +190,11 @@ struct rpcsvc_request {
* by the program actors. This is the buffer that will need to
* be de-xdred by the actor.
*/
- struct iovec msg[MAX_IOVEC];
int count;
+ struct iovec msg[MAX_IOVEC];
struct iobref *iobref;
- /* Status of the RPC call, whether it was accepted or denied. */
- int rpc_status;
-
- /* In case, the call was denied, the RPC error is stored here
- * till the reply is sent.
- */
- int rpc_err;
-
- /* In case the failure happened because of an authentication problem
- * , this value needs to be assigned the correct auth error number.
- */
- int auth_err;
-
/* There can be cases of RPC requests where the reply needs to
* be built from multiple sources. E.g. where even the NFS reply
* can contain a payload, as in the NFSv3 read reply. Here the RPC header
@@ -234,14 +210,14 @@ struct rpcsvc_request {
size_t payloadsize;
/* The credentials extracted from the rpc request */
- rpcsvc_auth_data_t cred;
+ client_auth_data_t cred;
/* The verified extracted from the rpc request. In request side
* processing this contains the verifier sent by the client, on reply
* side processing, it is filled with the verified that will be
* sent to the client.
*/
- rpcsvc_auth_data_t verf;
+ client_auth_data_t verf;
/* Container for a RPC program wanting to store a temp
* request-specific item.
*/
@@ -256,6 +232,19 @@ struct rpcsvc_request {
/* request queue in rpcsvc */
struct list_head request_list;
+ /* Status of the RPC call, whether it was accepted or denied. */
+ int rpc_status;
+
+ /* In case, the call was denied, the RPC error is stored here
+ * till the reply is sent.
+ */
+ int rpc_err;
+
+ /* In case the failure happened because of an authentication problem
+ * , this value needs to be assigned the correct auth error number.
+ */
+ int auth_err;
+
/* Things passed to rpc layer from client */
/* @flags: Can be used for binary data passed in xdata to be
@@ -275,6 +264,8 @@ struct rpcsvc_request {
gf_boolean_t ownthread;
gf_boolean_t synctask;
+ struct timespec begin; /*req handling start time*/
+ struct timespec end; /*req handling end time*/
};
#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
@@ -316,6 +307,20 @@ struct rpcsvc_request {
} \
} while (0);
+#define RPC_AUTH_ALL_SQUASH(req) \
+ do { \
+ int gidcount = 0; \
+ if (req->svc->all_squash) { \
+ req->uid = req->svc->anonuid; \
+ req->gid = req->svc->anongid; \
+ \
+ for (gidcount = 0; gidcount < req->auxgidcount; ++gidcount) { \
+ if (!req->auxgids[gidcount]) \
+ req->auxgids[gidcount] = req->svc->anongid; \
+ } \
+ } \
+ } while (0);
+
#define RPCSVC_ACTOR_SUCCESS 0
#define RPCSVC_ACTOR_ERROR (-1)
#define RPCSVC_ACTOR_IGNORE (-2)
@@ -354,7 +359,6 @@ typedef void (*rpcsvc_deallocate_reply)(void *msg);
*/
typedef struct rpcsvc_actor_desc {
char procname[RPCSVC_NAME_MAX];
- int procnum;
rpcsvc_actor actor;
/* Handler for cases where the RPC requests fragments are large enough
@@ -367,18 +371,20 @@ typedef struct rpcsvc_actor_desc {
*/
rpcsvc_vector_sizer vector_sizer;
+ int procnum;
+
/* Can actor be ran on behalf an unprivileged requestor? */
- gf_boolean_t unprivileged;
drc_op_type_t op_type;
+ gf_boolean_t unprivileged;
} rpcsvc_actor_t;
typedef struct rpcsvc_request_queue {
- int gen;
struct list_head request_queue;
pthread_mutex_t queue_lock;
pthread_cond_t queue_cond;
pthread_t thread;
struct rpcsvc_program *program;
+ int gen;
gf_boolean_t waiting;
} rpcsvc_request_queue_t;
@@ -413,10 +419,9 @@ struct rpcsvc_program {
int numactors; /* Num actors in actor array */
int proghighvers; /* Highest ver for program
supported by the system. */
- int proglowvers; /* Lowest ver */
-
/* Program specific state handed to actors */
void *private;
+ gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/
/* This upcall is provided by the program during registration.
* It is used to notify the program about events like connection being
@@ -426,6 +431,8 @@ struct rpcsvc_program {
*/
rpcsvc_notify_t notify;
+ int proglowvers; /* Lowest ver */
+
/* An integer that identifies the min auth strength that is required
* by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.
* See RFC 1813, Section 5.2.1.
@@ -435,7 +442,6 @@ struct rpcsvc_program {
/* list member to link to list of registered services with rpcsvc */
struct list_head program;
rpcsvc_request_queue_t request_queue[EVENT_MAX_THREADS];
- char request_queue_status[EVENT_MAX_THREADS / 8 + 1];
pthread_mutex_t thr_lock;
pthread_cond_t thr_cond;
int threadcount;
@@ -454,6 +460,7 @@ struct rpcsvc_program {
gf_boolean_t alive;
gf_boolean_t synctask;
+ unsigned long request_queue_status[EVENT_MAX_THREADS / __BITS_PER_LONG];
};
typedef struct rpcsvc_cbk_program {
@@ -585,9 +592,9 @@ typedef struct rpcsvc_auth_ops {
typedef struct rpcsvc_auth_flavour_desc {
char authname[RPCSVC_NAME_MAX];
- int authnum;
rpcsvc_auth_ops_t *authops;
void *authprivate;
+ int authnum;
} rpcsvc_auth_t;
typedef void *(*rpcsvc_auth_initer_t)(rpcsvc_t *svc, dict_t *options);
@@ -651,7 +658,7 @@ rpcsvc_actor_t *
rpcsvc_program_actor(rpcsvc_request_t *req);
int
-rpcsvc_transport_unix_options_build(dict_t **options, char *filepath);
+rpcsvc_transport_unix_options_build(dict_t *options, char *filepath);
int
rpcsvc_set_allow_insecure(rpcsvc_t *svc, dict_t *options);
int
@@ -659,6 +666,8 @@ rpcsvc_set_addr_namelookup(rpcsvc_t *svc, dict_t *options);
int
rpcsvc_set_root_squash(rpcsvc_t *svc, dict_t *options);
int
+rpcsvc_set_all_squash(rpcsvc_t *svc, dict_t *options);
+int
rpcsvc_set_outstanding_rpc_limit(rpcsvc_t *svc, dict_t *options, int defvalue);
int
@@ -677,4 +686,9 @@ rpcsvc_get_program_vector_sizer(rpcsvc_t *svc, uint32_t prognum,
uint32_t progver, int procnum);
void
rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr);
+
+extern int
+rpcsvc_destroy(rpcsvc_t *svc);
+void
+rpcsvc_statedump(rpcsvc_t *svc);
#endif
diff --git a/rpc/rpc-lib/src/xdr-common.h b/rpc/rpc-lib/src/xdr-common.h
index 7b0bc36ec64..752736b3d4d 100644
--- a/rpc/rpc-lib/src/xdr-common.h
+++ b/rpc/rpc-lib/src/xdr-common.h
@@ -66,11 +66,9 @@ enum gf_dump_procnum {
#ifdef GF_LINUX_HOST_OS
#define xdr_u_int32_t xdr_uint32_t
#define xdr_u_int64_t xdr_uint64_t
-#ifdef IPV6_DEFAULT
unsigned long
xdr_sizeof(xdrproc_t func, void *data);
#endif
-#endif
#ifdef GF_DARWIN_HOST_OS
#define xdr_u_quad_t xdr_u_int64_t
diff --git a/rpc/rpc-lib/src/xdr-rpc.c b/rpc/rpc-lib/src/xdr-rpc.c
index 36fd9db1a97..4992dc5a7ce 100644
--- a/rpc/rpc-lib/src/xdr-rpc.c
+++ b/rpc/rpc-lib/src/xdr-rpc.c
@@ -9,17 +9,13 @@
*/
#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
#include <rpc/auth_unix.h>
-#include "mem-pool.h"
#include "xdr-rpc.h"
#include "xdr-common.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
/* Decodes the XDR format in msgbuf into rpc_msg.
* The remaining payload is returned into payload.
diff --git a/rpc/rpc-lib/src/xdr-rpc.h b/rpc/rpc-lib/src/xdr-rpc.h
index a57cd9430be..7baed273846 100644
--- a/rpc/rpc-lib/src/xdr-rpc.h
+++ b/rpc/rpc-lib/src/xdr-rpc.h
@@ -20,7 +20,6 @@
#include <rpc/auth_sys.h>
#endif
-//#include <rpc/pmap_clnt.h>
#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
diff --git a/rpc/rpc-lib/src/xdr-rpcclnt.c b/rpc/rpc-lib/src/xdr-rpcclnt.c
index 9e60d19e7a2..8dcdcfeda83 100644
--- a/rpc/rpc-lib/src/xdr-rpcclnt.c
+++ b/rpc/rpc-lib/src/xdr-rpcclnt.c
@@ -9,18 +9,14 @@
*/
#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
#include <rpc/auth_unix.h>
#include <errno.h>
-#include "mem-pool.h"
#include "xdr-rpc.h"
#include "xdr-common.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
/* Decodes the XDR format in msgbuf into rpc_msg.
* The remaining payload is returned into payload.
diff --git a/rpc/rpc-lib/src/xdr-rpcclnt.h b/rpc/rpc-lib/src/xdr-rpcclnt.h
index 4d6e38d429c..58eda4892a9 100644
--- a/rpc/rpc-lib/src/xdr-rpcclnt.h
+++ b/rpc/rpc-lib/src/xdr-rpcclnt.h
@@ -11,8 +11,6 @@
#ifndef _XDR_RPCCLNT_H
#define _XDR_RPCCLNT_H
-//#include <rpc/rpc.h>
-//#include <rpc/pmap_clnt.h>
#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
diff --git a/rpc/rpc-transport/Makefile.am b/rpc/rpc-transport/Makefile.am
index 221fd640514..7dd9f026cfc 100644
--- a/rpc/rpc-transport/Makefile.am
+++ b/rpc/rpc-transport/Makefile.am
@@ -1 +1 @@
-SUBDIRS = socket $(RDMA_SUBDIR)
+SUBDIRS = socket
diff --git a/rpc/rpc-transport/rdma/Makefile.am b/rpc/rpc-transport/rdma/Makefile.am
deleted file mode 100644
index f963effea22..00000000000
--- a/rpc/rpc-transport/rdma/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = src \ No newline at end of file
diff --git a/rpc/rpc-transport/rdma/src/Makefile.am b/rpc/rpc-transport/rdma/src/Makefile.am
deleted file mode 100644
index 40b5a19d3d1..00000000000
--- a/rpc/rpc-transport/rdma/src/Makefile.am
+++ /dev/null
@@ -1,24 +0,0 @@
-# TODO : need to change transportdir
-
-transport_LTLIBRARIES = rdma.la
-transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
-
-rdma_la_LDFLAGS = -module -avoid-version -nostartfiles
-
-rdma_la_SOURCES = rdma.c name.c
-rdma_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- -libverbs -lrdmacm
-
-noinst_HEADERS = rdma.h name.h rpc-trans-rdma-messages.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) \
- -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/rpc/xdr/src \
- -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES = *~
diff --git a/rpc/rpc-transport/rdma/src/name.c b/rpc/rpc-transport/rdma/src/name.c
deleted file mode 100644
index d57f2d4f58d..00000000000
--- a/rpc/rpc-transport/rdma/src/name.c
+++ /dev/null
@@ -1,703 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <errno.h>
-#include <netdb.h>
-#include <string.h>
-#include <rdma/rdma_cma.h>
-
-#ifndef AF_INET_SDP
-#define AF_INET_SDP 27
-#endif
-
-#include "rpc-transport.h"
-#include "rdma.h"
-#include "common-utils.h"
-#include "rpc-lib-messages.h"
-#include "rpc-trans-rdma-messages.h"
-
-int32_t
-gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache,
- struct addrinfo **addr_info);
-
-static void
-_assign_port(struct sockaddr *sockaddr, uint16_t port)
-{
- switch (sockaddr->sa_family) {
- case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons(port);
- break;
-
- case AF_INET_SDP:
- case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons(port);
- break;
- }
-}
-
-static int32_t
-af_inet_bind_to_port_lt_ceiling(struct rdma_cm_id *cm_id,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len, uint32_t ceiling)
-{
-#if GF_DISABLE_PRIVPORT_TRACKING
- _assign_port(sockaddr, 0);
- return rdma_bind_addr(cm_id, sockaddr);
-#else
- int32_t ret = -1;
- uint16_t port = ceiling - 1;
- unsigned char ports[GF_PORT_ARRAY_SIZE] = {
- 0,
- };
- int i = 0;
-
-loop:
- ret = gf_process_reserved_ports(ports, ceiling);
-
- while (port) {
- if (port == GF_CLIENT_PORT_CEILING) {
- ret = -1;
- break;
- }
-
- /* ignore the reserved ports */
- if (BIT_VALUE(ports, port)) {
- port--;
- continue;
- }
-
- _assign_port(sockaddr, port);
-
- ret = rdma_bind_addr(cm_id, sockaddr);
-
- if (ret == 0)
- break;
-
- if (ret == -1 && errno == EACCES)
- break;
-
- port--;
- }
-
- /* In case if all the secure ports are exhausted, we are no more
- * binding to secure ports, hence instead of getting a random
- * port, lets define the range to restrict it from getting from
- * ports reserved for bricks i.e from range of 49152 - 65535
- * which further may lead to port clash */
- if (!port) {
- ceiling = port = GF_CLNT_INSECURE_PORT_CEILING;
- for (i = 0; i <= ceiling; i++)
- BIT_CLEAR(ports, i);
- goto loop;
- }
-
- return ret;
-#endif /* GF_DISABLE_PRIVPORT_TRACKING */
-}
-
-#if 0
-static int32_t
-af_unix_client_bind (rpc_transport_t *this, struct sockaddr *sockaddr,
- socklen_t sockaddr_len, struct rdma_cm_id *cm_id)
-{
- data_t *path_data = NULL;
- struct sockaddr_un *addr = NULL;
- int32_t ret = -1;
-
- path_data = dict_get (this->options,
- "transport.rdma.bind-path");
- if (path_data) {
- char *path = data_to_str (path_data);
- if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_msg_debug (this->name, 0,
- "transport.rdma.bind-path not specified "
- "for unix socket, letting connect to "
- "assign default value");
- goto err;
- }
-
- addr = (struct sockaddr_un *) sockaddr;
- strcpy (addr->sun_path, path);
- ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
- if (ret == -1) {
- gf_msg (this->name, GF_LOG_ERROR, errno,
- TRANS_MSG_SOCKET_BIND_ERROR,
- "cannot bind to unix-domain socket %d ",
- sock);
- goto err;
- }
- }
-
-err:
- return ret;
-}
-#endif
-
-static int32_t
-client_fill_address_family(rpc_transport_t *this, struct sockaddr *sockaddr)
-{
- data_t *address_family_data = NULL;
-
- address_family_data = dict_get(this->options, "transport.address-family");
- if (!address_family_data) {
- data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get(this->options, "remote-host");
- connect_path_data = dict_get(this->options,
- "transport.rdma.connect-path");
-
- if (!(remote_host_data || connect_path_data) ||
- (remote_host_data && connect_path_data)) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- TRANS_MSG_ADDR_FAMILY_NOT_SPECIFIED,
- "address-family not specified and not able to "
- "determine the same from other options "
- "(remote-host:%s and connect-path:%s)",
- data_to_str(remote_host_data),
- data_to_str(connect_path_data));
- return -1;
- }
-
- if (remote_host_data) {
- gf_msg_debug(this->name, 0,
- "address-family not "
- "specified, guessing it to be "
- "inet/inet6");
- sockaddr->sa_family = AF_UNSPEC;
- } else {
- gf_msg_debug(this->name, 0,
- "address-family not "
- "specified, guessing it to be unix");
- sockaddr->sa_family = AF_UNIX;
- }
-
- } else {
- char *address_family = data_to_str(address_family_data);
- if (!strcasecmp(address_family, "unix")) {
- sockaddr->sa_family = AF_UNIX;
- } else if (!strcasecmp(address_family, "inet")) {
- sockaddr->sa_family = AF_INET;
- } else if (!strcasecmp(address_family, "inet6")) {
- sockaddr->sa_family = AF_INET6;
- } else if (!strcasecmp(address_family, "inet-sdp")) {
- sockaddr->sa_family = AF_INET_SDP;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_UNKNOWN_ADDR_FAMILY,
- "unknown address-family (%s) specified", address_family);
- sockaddr->sa_family = AF_UNSPEC;
- return -1;
- }
- }
-
- return 0;
-}
-
-static int32_t
-af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len, int16_t remote_port)
-{
- dict_t *options = this->options;
- data_t *remote_host_data = NULL;
- data_t *remote_port_data = NULL;
- char *remote_host = NULL;
- struct addrinfo *addr_info = NULL;
- int32_t ret = 0;
-
- remote_host_data = dict_get(options, "remote-host");
- if (remote_host_data == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_REMOTE_HOST_ERROR,
- "option remote-host "
- "missing in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- remote_host = data_to_str(remote_host_data);
- if (remote_host == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_REMOTE_HOST_ERROR,
- "option remote-host "
- "has data NULL in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- if (remote_port == 0) {
- remote_port_data = dict_get(options, "remote-port");
- if (remote_port_data == NULL) {
- gf_msg_debug(this->name, 0,
- "option remote-port "
- "missing in volume %s. Defaulting to %d",
- this->name, GF_DEFAULT_RDMA_LISTEN_PORT);
-
- remote_port = GF_DEFAULT_RDMA_LISTEN_PORT;
- } else {
- remote_port = data_to_uint16(remote_port_data);
- }
- }
-
- if (remote_port == -1) {
- gf_msg(this->name, GF_LOG_ERROR, EINVAL, RDMA_MSG_INVALID_ENTRY,
- "option remote-port has "
- "invalid port in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- /* TODO: gf_resolve is a blocking call. kick in some
- non blocking dns techniques */
- ret = gf_resolve_ip6(remote_host, remote_port, sockaddr->sa_family,
- &this->dnscache, &addr_info);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_DNS_RESOL_FAILED,
- "DNS resolution failed on host %s", remote_host);
- goto err;
- }
-
- memcpy(sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
- *sockaddr_len = addr_info->ai_addrlen;
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_client_get_remote_sockaddr(rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
-{
- struct sockaddr_un *sockaddr_un = NULL;
- char *connect_path = NULL;
- data_t *connect_path_data = NULL;
- int32_t ret = 0;
-
- connect_path_data = dict_get(this->options, "transport.rdma.connect-path");
- if (!connect_path_data) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_CONNECT_PATH_ERROR,
- "option "
- "transport.rdma.connect-path not specified for "
- "address-family unix");
- ret = -1;
- goto err;
- }
-
- connect_path = data_to_str(connect_path_data);
- if (!connect_path) {
- gf_msg(this->name, GF_LOG_ERROR, EINVAL, RDMA_MSG_INVALID_ENTRY,
- "connect-path is null-string");
- ret = -1;
- goto err;
- }
-
- if (strlen(connect_path) > UNIX_PATH_MAX) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_CONNECT_PATH_ERROR,
- "connect-path value length %" GF_PRI_SIZET
- " > "
- "%d octets",
- strlen(connect_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- gf_msg_debug(this->name, 0, "using connect-path %s", connect_path);
- sockaddr_un = (struct sockaddr_un *)sockaddr;
- strcpy(sockaddr_un->sun_path, connect_path);
- *sockaddr_len = sizeof(struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *listen_path_data = NULL;
- char *listen_path = NULL;
- int32_t ret = 0;
- struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
-
- listen_path_data = dict_get(this->options, "transport.rdma.listen-path");
- if (!listen_path_data) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_LISTEN_PATH_ERROR,
- "missing option listen-path");
- ret = -1;
- goto err;
- }
-
- listen_path = data_to_str(listen_path_data);
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
- if (strlen(listen_path) > UNIX_PATH_MAX) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_LISTEN_PATH_ERROR,
- "option listen-path has "
- "value length %" GF_PRI_SIZET " > %d",
- strlen(listen_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- sunaddr->sun_family = AF_UNIX;
- strcpy(sunaddr->sun_path, listen_path);
- *addr_len = sizeof(struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_inet_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len)
-{
- struct addrinfo hints, *res = 0;
- data_t *listen_port_data = NULL, *listen_host_data = NULL;
- uint16_t listen_port = 0;
- char service[NI_MAXSERV], *listen_host = NULL;
- dict_t *options = NULL;
- int32_t ret = 0;
-
- options = this->options;
-
- listen_port_data = dict_get(options, "transport.rdma.listen-port");
- if (listen_port_data) {
- listen_port = data_to_uint16(listen_port_data);
- }
-
- listen_host_data = dict_get(options, "transport.rdma.bind-address");
- if (listen_host_data) {
- listen_host = data_to_str(listen_host_data);
- } else {
- if (addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *in = (struct sockaddr_in6 *)addr;
- in->sin6_addr = in6addr_any;
- in->sin6_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in6);
- goto out;
- } else if (addr->sa_family == AF_INET) {
- struct sockaddr_in *in = (struct sockaddr_in *)addr;
- in->sin_addr.s_addr = htonl(INADDR_ANY);
- in->sin_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in);
- goto out;
- }
- }
-
- sprintf(service, "%d", listen_port);
-
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = addr->sa_family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
-
- ret = getaddrinfo(listen_host, service, &hints, &res);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, ret, TRANS_MSG_GET_ADDR_INFO_FAILED,
- "getaddrinfo failed for host %s, service %s", listen_host,
- service);
- ret = -1;
- goto out;
- }
-
- memcpy(addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
-
- freeaddrinfo(res);
-
-out:
- return ret;
-}
-
-int32_t
-gf_rdma_client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
- socklen_t *sockaddr_len, struct rdma_cm_id *cm_id)
-{
- int ret = 0;
-
- *sockaddr_len = sizeof(struct sockaddr_in6);
- switch (sockaddr->sa_family) {
- case AF_INET_SDP:
- case AF_INET:
- *sockaddr_len = sizeof(struct sockaddr_in);
- /* Fall through */
- case AF_INET6:
- if (!this->bind_insecure) {
- ret = af_inet_bind_to_port_lt_ceiling(
- cm_id, sockaddr, *sockaddr_len, GF_CLIENT_PORT_CEILING);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, errno,
- RDMA_MSG_PORT_BIND_FAILED,
- "cannot bind rdma_cm_id to port "
- "less than %d",
- GF_CLIENT_PORT_CEILING);
- }
- } else {
- ret = af_inet_bind_to_port_lt_ceiling(
- cm_id, sockaddr, *sockaddr_len, GF_IANA_PRIV_PORTS_START);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, errno,
- RDMA_MSG_PORT_BIND_FAILED,
- "cannot bind rdma_cm_id to port "
- "less than %d",
- GF_IANA_PRIV_PORTS_START);
- }
- }
- break;
-
- case AF_UNIX:
- *sockaddr_len = sizeof(struct sockaddr_un);
-#if 0
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
- *sockaddr_len, sock);
-#endif
- break;
-
- default:
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_UNKNOWN_ADDR_FAMILY,
- "unknown address family %d", sockaddr->sa_family);
- ret = -1;
- break;
- }
-
- return ret;
-}
-
-int32_t
-gf_rdma_client_get_remote_sockaddr(rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len, int16_t remote_port)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- ret = client_fill_address_family(this, sockaddr);
- if (ret) {
- ret = -1;
- goto err;
- }
-
- switch (sockaddr->sa_family) {
- case AF_INET_SDP:
- sockaddr->sa_family = AF_INET;
- is_inet_sdp = 1;
- /* Fall through */
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr(this, sockaddr,
- sockaddr_len, remote_port);
-
- if (is_inet_sdp) {
- sockaddr->sa_family = AF_INET_SDP;
- }
-
- break;
-
- case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr(this, sockaddr,
- sockaddr_len);
- break;
-
- default:
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_UNKNOWN_ADDR_FAMILY,
- "unknown address-family %d", sockaddr->sa_family);
- ret = -1;
- }
-
-err:
- return ret;
-}
-
-int32_t
-gf_rdma_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *address_family_data = NULL;
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- address_family_data = dict_get(this->options, "transport.address-family");
- if (address_family_data) {
- char *address_family = NULL;
- address_family = data_to_str(address_family_data);
-
- if (!strcasecmp(address_family, "inet")) {
- addr->sa_family = AF_INET;
- } else if (!strcasecmp(address_family, "inet6")) {
- addr->sa_family = AF_INET6;
- } else if (!strcasecmp(address_family, "inet-sdp")) {
- addr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp(address_family, "unix")) {
- addr->sa_family = AF_UNIX;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_UNKNOWN_ADDR_FAMILY,
- "unknown address"
- " family (%s) specified",
- address_family);
- addr->sa_family = AF_UNSPEC;
- ret = -1;
- goto err;
- }
- } else {
- gf_msg_debug(this->name, 0,
- "option address-family not "
- "specified, defaulting to inet");
- addr->sa_family = AF_INET;
- }
-
- switch (addr->sa_family) {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- addr->sa_family = AF_INET;
- /* Fall through */
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_server_get_local_sockaddr(this, addr, addr_len);
- if (is_inet_sdp && !ret) {
- addr->sa_family = AF_INET_SDP;
- }
- break;
-
- case AF_UNIX:
- ret = af_unix_server_get_local_sockaddr(this, addr, addr_len);
- break;
- }
-
-err:
- return ret;
-}
-
-int32_t
-fill_inet6_inet_identifiers(rpc_transport_t *this,
- struct sockaddr_storage *addr, int32_t addr_len,
- char *identifier)
-{
- int32_t ret = 0, tmpaddr_len = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
- union gf_sock_union sock_union;
-
- memset(&sock_union, 0, sizeof(sock_union));
- sock_union.storage = *addr;
- tmpaddr_len = addr_len;
-
- if (sock_union.sa.sa_family == AF_INET6) {
- int32_t one_to_four, four_to_eight, twelve_to_sixteen;
- int16_t eight_to_ten, ten_to_twelve;
-
- one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
- four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
-#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
- ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
-#else
- eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
- ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
-#endif
-
- twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
-
- /* ipv4 mapped ipv6 address has
- bits 0-80: 0
- bits 80-96: 0xffff
- bits 96-128: ipv4 address
- */
-
- if (one_to_four == 0 && four_to_eight == 0 && eight_to_ten == 0 &&
- ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = &sock_union.sin;
- memset(&sock_union, 0, sizeof(sock_union));
-
- in_ptr->sin_family = AF_INET;
- in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
- in_ptr->sin_addr.s_addr = twelve_to_sixteen;
- tmpaddr_len = sizeof(*in_ptr);
- }
- }
-
- ret = getnameinfo(&sock_union.sa, tmpaddr_len, host, sizeof(host), service,
- sizeof(service), NI_NUMERICHOST | NI_NUMERICSERV);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, ret, TRANS_MSG_GET_NAME_INFO_FAILED,
- "getnameinfo failed");
- }
-
- sprintf(identifier, "%s:%s", host, service);
-
- return ret;
-}
-
-int32_t
-gf_rdma_get_transport_identifiers(rpc_transport_t *this)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- switch (((struct sockaddr *)&this->myinfo.sockaddr)->sa_family) {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *)&this->peerinfo.sockaddr)
- ->sa_family = ((struct sockaddr *)&this->myinfo.sockaddr)
- ->sa_family = AF_INET;
- /* Fall through */
- case AF_INET:
- case AF_INET6: {
- ret = fill_inet6_inet_identifiers(this, &this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- this->myinfo.identifier);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_INET_ERROR,
- "can't fill inet/inet6 identifier for server");
- goto err;
- }
-
- ret = fill_inet6_inet_identifiers(this, &this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len,
- this->peerinfo.identifier);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_INET_ERROR,
- "can't fill inet/inet6 identifier for client");
- goto err;
- }
-
- if (is_inet_sdp) {
- ((struct sockaddr *)&this->peerinfo.sockaddr)
- ->sa_family = ((struct sockaddr *)&this->myinfo.sockaddr)
- ->sa_family = AF_INET_SDP;
- }
- } break;
-
- case AF_UNIX: {
- struct sockaddr_un *sunaddr = NULL;
-
- sunaddr = (struct sockaddr_un *)&this->myinfo.sockaddr;
- strcpy(this->myinfo.identifier, sunaddr->sun_path);
-
- sunaddr = (struct sockaddr_un *)&this->peerinfo.sockaddr;
- strcpy(this->peerinfo.identifier, sunaddr->sun_path);
- } break;
-
- default:
- gf_msg(this->name, GF_LOG_ERROR, 0, TRANS_MSG_UNKNOWN_ADDR_FAMILY,
- "unknown address family (%d)",
- ((struct sockaddr *)&this->myinfo.sockaddr)->sa_family);
- ret = -1;
- break;
- }
-
-err:
- return ret;
-}
diff --git a/rpc/rpc-transport/rdma/src/name.h b/rpc/rpc-transport/rdma/src/name.h
deleted file mode 100644
index 6c782b52db5..00000000000
--- a/rpc/rpc-transport/rdma/src/name.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _IB_VERBS_NAME_H
-#define _IB_VERBS_NAME_H
-
-#include <rdma/rdma_cma.h>
-
-#include "compat.h"
-
-int32_t
-gf_rdma_client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
- socklen_t *sockaddr_len, struct rdma_cm_id *cm_id);
-
-int32_t
-gf_rdma_client_get_remote_sockaddr(rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int16_t remote_port);
-
-int32_t
-gf_rdma_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len);
-
-int32_t
-gf_rdma_get_transport_identifiers(rpc_transport_t *this);
-
-#endif /* _IB_VERBS_NAME_H */
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
deleted file mode 100644
index ebf54885f1a..00000000000
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ /dev/null
@@ -1,4907 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "dict.h"
-#include "glusterfs.h"
-#include "iobuf.h"
-#include "logging.h"
-#include "rdma.h"
-#include "name.h"
-#include "byte-order.h"
-#include "xlator.h"
-#include "xdr-rpc.h"
-#include "rpc-lib-messages.h"
-#include "rpc-trans-rdma-messages.h"
-#include <signal.h>
-
-#define GF_RDMA_LOG_NAME "rpc-transport/rdma"
-
-static int32_t
-__gf_rdma_ioq_churn(gf_rdma_peer_t *peer);
-
-gf_rdma_post_t *
-gf_rdma_post_ref(gf_rdma_post_t *post);
-
-int
-gf_rdma_post_unref(gf_rdma_post_t *post);
-
-static void *
-gf_rdma_send_completion_proc(void *data);
-
-static void *
-gf_rdma_recv_completion_proc(void *data);
-
-void *
-gf_rdma_async_event_thread(void *context);
-
-static int32_t
-gf_rdma_create_qp(rpc_transport_t *this);
-
-static int32_t
-__gf_rdma_teardown(rpc_transport_t *this);
-
-static int32_t
-gf_rdma_teardown(rpc_transport_t *this);
-
-static int32_t
-gf_rdma_disconnect(rpc_transport_t *this, gf_boolean_t wait);
-
-static void
-gf_rdma_cm_handle_disconnect(rpc_transport_t *this);
-
-static int
-gf_rdma_cm_handle_connect_init(struct rdma_cm_event *event);
-
-static void
-gf_rdma_put_post(gf_rdma_queue_t *queue, gf_rdma_post_t *post)
-{
- post->ctx.is_request = 0;
-
- pthread_mutex_lock(&queue->lock);
- {
- if (post->prev) {
- queue->active_count--;
- post->prev->next = post->next;
- }
-
- if (post->next) {
- post->next->prev = post->prev;
- }
-
- post->prev = &queue->passive_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- queue->passive_count++;
- }
- pthread_mutex_unlock(&queue->lock);
-}
-
-static gf_rdma_post_t *
-gf_rdma_new_post(rpc_transport_t *this, gf_rdma_device_t *device, int32_t len,
- gf_rdma_post_type_t type)
-{
- gf_rdma_post_t *post = NULL;
- int ret = -1;
-
- post = (gf_rdma_post_t *)GF_CALLOC(1, sizeof(*post),
- gf_common_mt_rdma_post_t);
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_init(&post->lock, NULL);
-
- post->buf_size = len;
-
- post->buf = valloc(len);
- if (!post->buf) {
- gf_msg_nomem(GF_RDMA_LOG_NAME, GF_LOG_ERROR, len);
- goto out;
- }
-
- post->mr = ibv_reg_mr(device->pd, post->buf, post->buf_size,
- IBV_ACCESS_LOCAL_WRITE);
- if (!post->mr) {
- gf_msg(this->name, GF_LOG_WARNING, errno, RDMA_MSG_MR_ALOC_FAILED,
- "memory registration failed");
- goto out;
- }
-
- post->device = device;
- post->type = type;
-
- ret = 0;
-out:
- if (ret != 0 && post) {
- free(post->buf);
-
- GF_FREE(post);
- post = NULL;
- }
-
- return post;
-}
-
-static gf_rdma_post_t *
-gf_rdma_get_post(gf_rdma_queue_t *queue)
-{
- gf_rdma_post_t *post = NULL;
-
- pthread_mutex_lock(&queue->lock);
- {
- post = queue->passive_posts.next;
- if (post == &queue->passive_posts)
- post = NULL;
-
- if (post) {
- if (post->prev)
- post->prev->next = post->next;
- if (post->next)
- post->next->prev = post->prev;
- post->prev = &queue->active_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- post->reused++;
- queue->active_count++;
- }
- }
- pthread_mutex_unlock(&queue->lock);
-
- return post;
-}
-
-void
-gf_rdma_destroy_post(gf_rdma_post_t *post)
-{
- ibv_dereg_mr(post->mr);
- free(post->buf);
- GF_FREE(post);
-}
-
-static int32_t
-__gf_rdma_quota_get(gf_rdma_peer_t *peer)
-{
- int32_t ret = -1;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
-
- if (priv->connected && peer->quota > 0) {
- ret = peer->quota--;
- }
-
- return ret;
-}
-
-static void
-__gf_rdma_ioq_entry_free(gf_rdma_ioq_t *entry)
-{
- list_del_init(&entry->list);
-
- if (entry->iobref) {
- iobref_unref(entry->iobref);
- entry->iobref = NULL;
- }
-
- if (entry->msg.request.rsp_iobref) {
- iobref_unref(entry->msg.request.rsp_iobref);
- entry->msg.request.rsp_iobref = NULL;
- }
-
- mem_put(entry);
-}
-
-static void
-__gf_rdma_ioq_flush(gf_rdma_peer_t *peer)
-{
- gf_rdma_ioq_t *entry = NULL, *dummy = NULL;
-
- list_for_each_entry_safe(entry, dummy, &peer->ioq, list)
- {
- __gf_rdma_ioq_entry_free(entry);
- }
-}
-
-static int32_t
-__gf_rdma_disconnect(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
-
- if (priv->connected) {
- rdma_disconnect(priv->peer.cm_id);
- }
-
- return 0;
-}
-
-static void
-gf_rdma_queue_init(gf_rdma_queue_t *queue)
-{
- pthread_mutex_init(&queue->lock, NULL);
-
- queue->active_posts.next = &queue->active_posts;
- queue->active_posts.prev = &queue->active_posts;
- queue->passive_posts.next = &queue->passive_posts;
- queue->passive_posts.prev = &queue->passive_posts;
-}
-
-static void
-__gf_rdma_destroy_queue(gf_rdma_post_t *post)
-{
- gf_rdma_post_t *tmp = NULL;
-
- while (post->next != post) {
- tmp = post->next;
-
- post->next = post->next->next;
- post->next->prev = post;
-
- gf_rdma_destroy_post(tmp);
- }
-}
-
-static void
-gf_rdma_destroy_queue(gf_rdma_queue_t *queue)
-{
- if (queue == NULL) {
- goto out;
- }
-
- pthread_mutex_lock(&queue->lock);
- {
- if (queue->passive_count > 0) {
- __gf_rdma_destroy_queue(&queue->passive_posts);
- queue->passive_count = 0;
- }
-
- if (queue->active_count > 0) {
- __gf_rdma_destroy_queue(&queue->active_posts);
- queue->active_count = 0;
- }
- }
- pthread_mutex_unlock(&queue->lock);
-
-out:
- return;
-}
-
-static void
-gf_rdma_destroy_posts(rpc_transport_t *this)
-{
- gf_rdma_device_t *device = NULL;
- gf_rdma_private_t *priv = NULL;
-
- if (this == NULL) {
- goto out;
- }
-
- priv = this->private;
- device = priv->device;
-
- gf_rdma_destroy_queue(&device->sendq);
- gf_rdma_destroy_queue(&device->recvq);
-
-out:
- return;
-}
-
-static int32_t
-__gf_rdma_create_posts(rpc_transport_t *this, int32_t count, int32_t size,
- gf_rdma_queue_t *q, gf_rdma_post_type_t type)
-{
- int32_t i = 0;
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- for (i = 0; i < count; i++) {
- gf_rdma_post_t *post = NULL;
-
- post = gf_rdma_new_post(this, device, size + 2048, type);
- if (!post) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_POST_CREATION_FAILED,
- "post creation failed");
- ret = -1;
- break;
- }
-
- gf_rdma_put_post(q, post);
- }
- return ret;
-}
-
-static int32_t
-gf_rdma_post_recv(struct ibv_srq *srq, gf_rdma_post_t *post)
-{
- struct ibv_sge list = {.addr = (unsigned long)post->buf,
- .length = post->buf_size,
- .lkey = post->mr->lkey};
-
- struct ibv_recv_wr wr =
- {
- .wr_id = (unsigned long)post,
- .sg_list = &list,
- .num_sge = 1,
- },
- *bad_wr;
-
- gf_rdma_post_ref(post);
-
- return ibv_post_srq_recv(srq, &wr, &bad_wr);
-}
-
-static void
-gf_rdma_deregister_iobuf_pool(gf_rdma_device_t *device)
-{
- gf_rdma_arena_mr *arena_mr = NULL;
- gf_rdma_arena_mr *tmp = NULL;
-
- while (device) {
- pthread_mutex_lock(&device->all_mr_lock);
- {
- if (!list_empty(&device->all_mr)) {
- list_for_each_entry_safe(arena_mr, tmp, &device->all_mr, list)
- {
- if (ibv_dereg_mr(arena_mr->mr)) {
- gf_msg("rdma", GF_LOG_WARNING, 0,
- RDMA_MSG_DEREGISTER_ARENA_FAILED,
- "deallocation of memory region "
- "failed");
- pthread_mutex_unlock(&device->all_mr_lock);
- return;
- }
- list_del(&arena_mr->list);
- GF_FREE(arena_mr);
- }
- }
- }
- pthread_mutex_unlock(&device->all_mr_lock);
-
- device = device->next;
- }
-}
-
-int
-gf_rdma_deregister_arena(struct list_head **mr_list,
- struct iobuf_arena *iobuf_arena)
-{
- gf_rdma_arena_mr *tmp = NULL;
- gf_rdma_arena_mr *dummy = NULL;
- gf_rdma_device_t *device = NULL;
- int count = 0, i = 0;
-
- count = iobuf_arena->iobuf_pool->rdma_device_count;
- for (i = 0; i < count; i++) {
- device = iobuf_arena->iobuf_pool->device[i];
- pthread_mutex_lock(&device->all_mr_lock);
- {
- list_for_each_entry_safe(tmp, dummy, mr_list[i], list)
- {
- if (tmp->iobuf_arena == iobuf_arena) {
- if (ibv_dereg_mr(tmp->mr)) {
- gf_msg("rdma", GF_LOG_WARNING, 0,
- RDMA_MSG_DEREGISTER_ARENA_FAILED,
- "deallocation of memory region "
- "failed");
- pthread_mutex_unlock(&device->all_mr_lock);
- return -1;
- }
- list_del(&tmp->list);
- GF_FREE(tmp);
- break;
- }
- }
- }
- pthread_mutex_unlock(&device->all_mr_lock);
- }
-
- return 0;
-}
-
-int
-gf_rdma_register_arena(void **arg1, void *arg2)
-{
- struct ibv_mr *mr = NULL;
- gf_rdma_arena_mr *new = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
- gf_rdma_device_t **device = (gf_rdma_device_t **)arg1;
- struct iobuf_arena *iobuf_arena = arg2;
- int count = 0, i = 0;
-
- iobuf_pool = iobuf_arena->iobuf_pool;
- count = iobuf_pool->rdma_device_count;
- for (i = 0; i < count; i++) {
- new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
- gf_common_mt_rdma_arena_mr);
- if (new == NULL) {
- gf_msg("rdma", GF_LOG_INFO, ENOMEM, RDMA_MSG_MR_ALOC_FAILED,
- "Out of "
- "memory: registering pre allocated buffer "
- "with rdma device failed.");
- return -1;
- }
- INIT_LIST_HEAD(&new->list);
- new->iobuf_arena = iobuf_arena;
-
- mr = ibv_reg_mr(device[i]->pd, iobuf_arena->mem_base,
- iobuf_arena->arena_size,
- IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE);
- if (!mr)
- gf_msg("rdma", GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED,
- "allocation of mr "
- "failed");
-
- new->mr = mr;
- pthread_mutex_lock(&device[i]->all_mr_lock);
- {
- list_add(&new->list, &device[i]->all_mr);
- }
- pthread_mutex_unlock(&device[i]->all_mr_lock);
- new = NULL;
- }
-
- return 0;
-}
-
-static void
-gf_rdma_register_iobuf_pool(gf_rdma_device_t *device,
- struct iobuf_pool *iobuf_pool)
-{
- struct iobuf_arena *tmp = NULL;
- struct iobuf_arena *dummy = NULL;
- struct ibv_mr *mr = NULL;
- gf_rdma_arena_mr *new = NULL;
-
- if (!list_empty(&iobuf_pool->all_arenas)) {
- list_for_each_entry_safe(tmp, dummy, &iobuf_pool->all_arenas, all_list)
- {
- new = GF_CALLOC(1, sizeof(gf_rdma_arena_mr),
- gf_common_mt_rdma_arena_mr);
- if (new == NULL) {
- gf_msg("rdma", GF_LOG_INFO, ENOMEM, RDMA_MSG_MR_ALOC_FAILED,
- "Out of "
- "memory: registering pre allocated "
- "buffer with rdma device failed.");
- return;
- }
- INIT_LIST_HEAD(&new->list);
- new->iobuf_arena = tmp;
-
- mr = ibv_reg_mr(device->pd, tmp->mem_base, tmp->arena_size,
- IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE);
- if (!mr) {
- gf_msg("rdma", GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED,
- "failed"
- " to pre register buffers with rdma "
- "devices.");
- }
- new->mr = mr;
- pthread_mutex_lock(&device->all_mr_lock);
- {
- list_add(&new->list, &device->all_mr);
- }
- pthread_mutex_unlock(&device->all_mr_lock);
-
- new = NULL;
- }
- }
-
- return;
-}
-
-static void
-gf_rdma_register_iobuf_pool_with_device(gf_rdma_device_t *device,
- struct iobuf_pool *iobuf_pool)
-{
- while (device) {
- gf_rdma_register_iobuf_pool(device, iobuf_pool);
- device = device->next;
- }
-}
-
-static struct ibv_mr *
-gf_rdma_get_pre_registred_mr(rpc_transport_t *this, void *ptr, int size)
-{
- gf_rdma_arena_mr *tmp = NULL;
- gf_rdma_arena_mr *dummy = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- pthread_mutex_lock(&device->all_mr_lock);
- {
- if (!list_empty(&device->all_mr)) {
- list_for_each_entry_safe(tmp, dummy, &device->all_mr, list)
- {
- if (tmp->iobuf_arena->mem_base <= ptr &&
- ptr < tmp->iobuf_arena->mem_base +
- tmp->iobuf_arena->arena_size) {
- pthread_mutex_unlock(&device->all_mr_lock);
- return tmp->mr;
- }
- }
- }
- }
- pthread_mutex_unlock(&device->all_mr_lock);
-
- return NULL;
-}
-
-static int32_t
-gf_rdma_create_posts(rpc_transport_t *this)
-{
- int32_t i = 0, ret = 0;
- gf_rdma_post_t *post = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- ret = __gf_rdma_create_posts(this, options->send_count, options->send_size,
- &device->sendq, GF_RDMA_SEND_POST);
- if (!ret)
- ret = __gf_rdma_create_posts(this, options->recv_count,
- options->recv_size, &device->recvq,
- GF_RDMA_RECV_POST);
-
- if (!ret) {
- for (i = 0; i < options->recv_count; i++) {
- post = gf_rdma_get_post(&device->recvq);
- if (gf_rdma_post_recv(device->srq, post) != 0) {
- ret = -1;
- break;
- }
- }
- }
-
- if (ret)
- gf_rdma_destroy_posts(this);
-
- return ret;
-}
-
-static void
-gf_rdma_destroy_cq(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- if (device->recv_cq)
- ibv_destroy_cq(device->recv_cq);
- device->recv_cq = NULL;
-
- if (device->send_cq)
- ibv_destroy_cq(device->send_cq);
- device->send_cq = NULL;
-
- return;
-}
-
-static int32_t
-gf_rdma_create_cq(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
- uint64_t send_cqe = 0;
- int32_t ret = 0;
- struct ibv_device_attr device_attr = {
- {0},
- };
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- device->recv_cq = ibv_create_cq(priv->device->context,
- options->recv_count * 2, device,
- device->recv_chan, 0);
- if (!device->recv_cq) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_CQ_CREATION_FAILED,
- "creation of CQ for "
- "device %s failed",
- device->device_name);
- ret = -1;
- goto out;
- } else if (ibv_req_notify_cq(device->recv_cq, 0)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_REQ_NOTIFY_CQ_REVQ_FAILED,
- "ibv_req_notify_"
- "cq on recv CQ of device %s failed",
- device->device_name);
- ret = -1;
- goto out;
- }
-
- do {
- ret = ibv_query_device(priv->device->context, &device_attr);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_QUERY_DEVICE_FAILED,
- "ibv_query_"
- "device on %s returned %d (%s)",
- priv->device->device_name, ret,
- (ret > 0) ? strerror(ret) : "");
- ret = -1;
- goto out;
- }
-
- send_cqe = (uint64_t)options->send_count * 128;
- send_cqe = (send_cqe > device_attr.max_cqe) ? device_attr.max_cqe
- : send_cqe;
-
- /* TODO: make send_cq size dynamically adaptive */
- device->send_cq = ibv_create_cq(priv->device->context, send_cqe, device,
- device->send_chan, 0);
- if (!device->send_cq) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_CQ_CREATION_FAILED,
- "creation of send_cq "
- "for device %s failed",
- device->device_name);
- ret = -1;
- goto out;
- }
-
- if (ibv_req_notify_cq(device->send_cq, 0)) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- RDMA_MSG_REQ_NOTIFY_CQ_SENDQ_FAILED,
- "ibv_req_notify_cq on send_cq for device %s"
- " failed",
- device->device_name);
- ret = -1;
- goto out;
- }
- } while (0);
-
-out:
- if (ret != 0)
- gf_rdma_destroy_cq(this);
-
- return ret;
-}
-
-static gf_rdma_device_t *
-gf_rdma_get_device(rpc_transport_t *this, struct ibv_context *ibctx,
- char *device_name)
-{
- glusterfs_ctx_t *ctx = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- int32_t ret = 0;
- int32_t i = 0;
- gf_rdma_device_t *trav = NULL, *device = NULL;
- gf_rdma_ctx_t *rdma_ctx = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
-
- priv = this->private;
- options = &priv->options;
- ctx = this->ctx;
- rdma_ctx = ctx->ib;
- iobuf_pool = ctx->iobuf_pool;
-
- trav = rdma_ctx->device;
-
- while (trav) {
- if (!strcmp(trav->device_name, device_name))
- break;
- trav = trav->next;
- }
-
- if (!trav) {
- trav = GF_CALLOC(1, sizeof(*trav), gf_common_mt_rdma_device_t);
- if (trav == NULL) {
- goto out;
- }
- priv->device = trav;
- trav->context = ibctx;
-
- trav->next = rdma_ctx->device;
- rdma_ctx->device = trav;
-
- iobuf_pool->device[iobuf_pool->rdma_device_count] = trav;
- iobuf_pool->mr_list[iobuf_pool->rdma_device_count++] = &trav->all_mr;
- trav->request_ctx_pool = mem_pool_new(gf_rdma_request_context_t,
- GF_RDMA_POOL_SIZE);
- if (trav->request_ctx_pool == NULL) {
- goto out;
- }
-
- trav->ioq_pool = mem_pool_new(gf_rdma_ioq_t, GF_RDMA_POOL_SIZE);
- if (trav->ioq_pool == NULL) {
- goto out;
- }
-
- trav->reply_info_pool = mem_pool_new(gf_rdma_reply_info_t,
- GF_RDMA_POOL_SIZE);
- if (trav->reply_info_pool == NULL) {
- goto out;
- }
-
- trav->device_name = gf_strdup(device_name);
-
- trav->send_chan = ibv_create_comp_channel(trav->context);
- if (!trav->send_chan) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_SEND_COMP_CHAN_FAILED,
- "could not "
- "create send completion channel for "
- "device (%s)",
- device_name);
- goto out;
- }
-
- trav->recv_chan = ibv_create_comp_channel(trav->context);
- if (!trav->recv_chan) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_RECV_COMP_CHAN_FAILED,
- "could not "
- "create recv completion channel for "
- "device (%s)",
- device_name);
-
- /* TODO: cleanup current mess */
- goto out;
- }
-
- if (gf_rdma_create_cq(this) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_CQ_CREATION_FAILED,
- "could not create CQ for device (%s)", device_name);
- goto out;
- }
-
- /* protection domain */
- trav->pd = ibv_alloc_pd(trav->context);
-
- if (!trav->pd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_ALOC_PROT_DOM_FAILED,
- "could not "
- "allocate protection domain for device (%s)",
- device_name);
- goto out;
- }
-
- struct ibv_srq_init_attr attr = {.attr = {.max_wr = options->recv_count,
- .max_sge = 1,
- .srq_limit = 10}};
- trav->srq = ibv_create_srq(trav->pd, &attr);
-
- if (!trav->srq) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_CRE_SRQ_FAILED,
- "could not create SRQ"
- " for device (%s)",
- device_name);
- goto out;
- }
-
- /* queue init */
- gf_rdma_queue_init(&trav->sendq);
- gf_rdma_queue_init(&trav->recvq);
-
- INIT_LIST_HEAD(&trav->all_mr);
- pthread_mutex_init(&trav->all_mr_lock, NULL);
- gf_rdma_register_iobuf_pool(trav, iobuf_pool);
-
- if (gf_rdma_create_posts(this) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, RDMA_MSG_ALOC_POST_FAILED,
- "could not allocate"
- "posts for device (%s)",
- device_name);
- goto out;
- }
-
- /* completion threads */
- ret = gf_thread_create(&trav->send_thread, NULL,
- gf_rdma_send_completion_proc, trav->send_chan,
- "rdmascom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- RDMA_MSG_SEND_COMP_THREAD_FAILED,
- "could not create send completion thread for "
- "device (%s)",
- device_name);
- goto out;
- }
-
- ret = gf_thread_create(&trav->recv_thread, NULL,
- gf_rdma_recv_completion_proc, trav->recv_chan,
- "rdmarcom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- RDMA_MSG_RECV_COMP_THREAD_FAILED,
- "could not create recv completion thread "
- "for device (%s)",
- device_name);
- return NULL;
- }
-
- ret = gf_thread_create(&trav->async_event_thread, NULL,
- gf_rdma_async_event_thread, ibctx, "rdmaAsyn");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- RDMA_MSG_ASYNC_EVENT_THEAD_FAILED,
- "could not create async_event_thread");
- return NULL;
- }
-
- /* qpreg */
- pthread_mutex_init(&trav->qpreg.lock, NULL);
- for (i = 0; i < 42; i++) {
- trav->qpreg.ents[i].next = &trav->qpreg.ents[i];
- trav->qpreg.ents[i].prev = &trav->qpreg.ents[i];
- }
- }
-
- device = trav;
- trav = NULL;
-out:
-
- if (trav != NULL) {
- rdma_ctx->device = trav->next;
- gf_rdma_destroy_posts(this);
- mem_pool_destroy(trav->ioq_pool);
- mem_pool_destroy(trav->request_ctx_pool);
- mem_pool_destroy(trav->reply_info_pool);
- if (trav->pd != NULL) {
- ibv_dealloc_pd(trav->pd);
- }
- gf_rdma_destroy_cq(this);
- ibv_destroy_comp_channel(trav->recv_chan);
- ibv_destroy_comp_channel(trav->send_chan);
- GF_FREE((char *)trav->device_name);
- GF_FREE(trav);
- }
-
- return device;
-}
-
-static rpc_transport_t *
-gf_rdma_transport_new(rpc_transport_t *listener, struct rdma_cm_id *cm_id)
-{
- gf_rdma_private_t *listener_priv = NULL, *priv = NULL;
- rpc_transport_t *this = NULL, *new = NULL;
- gf_rdma_options_t *options = NULL;
- char *device_name = NULL;
-
- listener_priv = listener->private;
-
- this = GF_CALLOC(1, sizeof(rpc_transport_t), gf_common_mt_rpc_transport_t);
- if (this == NULL) {
- goto out;
- }
-
- this->listener = listener;
-
- priv = GF_CALLOC(1, sizeof(gf_rdma_private_t), gf_common_mt_rdma_private_t);
- if (priv == NULL) {
- goto out;
- }
-
- this->private = priv;
- priv->options = listener_priv->options;
-
- priv->listener = listener;
- priv->entity = GF_RDMA_SERVER;
-
- options = &priv->options;
-
- this->ops = listener->ops;
- this->init = listener->init;
- this->fini = listener->fini;
- this->ctx = listener->ctx;
- this->name = gf_strdup(listener->name);
- this->notify = listener->notify;
- this->mydata = listener->mydata;
- this->xl = listener->xl;
-
- this->myinfo.sockaddr_len = sizeof(cm_id->route.addr.src_addr);
- memcpy(&this->myinfo.sockaddr, &cm_id->route.addr.src_addr,
- this->myinfo.sockaddr_len);
-
- this->peerinfo.sockaddr_len = sizeof(cm_id->route.addr.dst_addr);
- memcpy(&this->peerinfo.sockaddr, &cm_id->route.addr.dst_addr,
- this->peerinfo.sockaddr_len);
-
- priv->peer.trans = this;
- gf_rdma_get_transport_identifiers(this);
-
- device_name = (char *)ibv_get_device_name(cm_id->verbs->device);
- if (device_name == NULL) {
- gf_msg(listener->name, GF_LOG_WARNING, 0,
- RDMA_MSG_GET_DEVICE_NAME_FAILED,
- "cannot get device "
- "name (peer:%s me:%s)",
- this->peerinfo.identifier, this->myinfo.identifier);
- goto out;
- }
-
- priv->device = gf_rdma_get_device(this, cm_id->verbs, device_name);
- if (priv->device == NULL) {
- gf_msg(listener->name, GF_LOG_WARNING, 0, RDMA_MSG_GET_IB_DEVICE_FAILED,
- "cannot get infiniband"
- " device %s (peer:%s me:%s)",
- device_name, this->peerinfo.identifier, this->myinfo.identifier);
- goto out;
- }
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
- priv->peer.cm_id = cm_id;
- INIT_LIST_HEAD(&priv->peer.ioq);
-
- pthread_mutex_init(&priv->write_mutex, NULL);
- pthread_mutex_init(&priv->recv_mutex, NULL);
-
- cm_id->context = this;
-
- new = rpc_transport_ref(this);
- this = NULL;
-out:
- if (this != NULL) {
- if (this->private != NULL) {
- GF_FREE(this->private);
- }
-
- if (this->name != NULL) {
- GF_FREE(this->name);
- }
-
- GF_FREE(this);
- }
-
- return new;
-}
-
-static int
-gf_rdma_cm_handle_connect_request(struct rdma_cm_event *event)
-{
- int ret = -1;
- rpc_transport_t *this = NULL, *listener = NULL;
- struct rdma_cm_id *child_cm_id = NULL, *listener_cm_id = NULL;
- struct rdma_conn_param conn_param = {
- 0,
- };
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
-
- child_cm_id = event->id;
- listener_cm_id = event->listen_id;
-
- listener = listener_cm_id->context;
- priv = listener->private;
- options = &priv->options;
-
- this = gf_rdma_transport_new(listener, child_cm_id);
- if (this == NULL) {
- gf_msg(listener->name, GF_LOG_WARNING, 0,
- RDMA_MSG_CREAT_INC_TRANS_FAILED,
- "could not create "
- "a transport for incoming connection"
- " (me.name:%s me.identifier:%s)",
- listener->name, listener->myinfo.identifier);
- rdma_destroy_id(child_cm_id);
- goto out;
- }
-
- gf_msg_trace(listener->name, 0,
- "got a connect request (me:%s peer:"
- "%s)",
- listener->myinfo.identifier, this->peerinfo.identifier);
-
- ret = gf_rdma_create_qp(this);
- if (ret < 0) {
- gf_msg(listener->name, GF_LOG_WARNING, 0, RDMA_MSG_CREAT_QP_FAILED,
- "could not create QP "
- "(peer:%s me:%s)",
- this->peerinfo.identifier, this->myinfo.identifier);
- gf_rdma_cm_handle_disconnect(this);
- goto out;
- }
-
- conn_param.responder_resources = 1;
- conn_param.initiator_depth = 1;
- conn_param.retry_count = options->attr_retry_cnt;
- conn_param.rnr_retry_count = options->attr_rnr_retry;
-
- ret = rdma_accept(child_cm_id, &conn_param);
- if (ret < 0) {
- gf_msg(listener->name, GF_LOG_WARNING, errno, RDMA_MSG_ACCEPT_FAILED,
- "rdma_accept failed peer:%s "
- "me:%s",
- this->peerinfo.identifier, this->myinfo.identifier);
- gf_rdma_cm_handle_disconnect(this);
- goto out;
- }
- gf_rdma_cm_handle_connect_init(event);
- ret = 0;
-
-out:
- return ret;
-}
-
-static int
-gf_rdma_cm_handle_route_resolved(struct rdma_cm_event *event)
-{
- struct rdma_conn_param conn_param = {
- 0,
- };
- int ret = 0;
- rpc_transport_t *this = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_options_t *options = NULL;
-
- if (event == NULL) {
- goto out;
- }
-
- this = event->id->context;
-
- priv = this->private;
- peer = &priv->peer;
- options = &priv->options;
-
- ret = gf_rdma_create_qp(this);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_CREAT_QP_FAILED,
- "could not create QP "
- "(peer:%s me:%s)",
- this->peerinfo.identifier, this->myinfo.identifier);
- gf_rdma_cm_handle_disconnect(this);
- goto out;
- }
-
- memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = 1;
- conn_param.initiator_depth = 1;
- conn_param.retry_count = options->attr_retry_cnt;
- conn_param.rnr_retry_count = options->attr_rnr_retry;
-
- ret = rdma_connect(peer->cm_id, &conn_param);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, errno, RDMA_MSG_CONNECT_FAILED,
- "rdma_connect failed");
- gf_rdma_cm_handle_disconnect(this);
- goto out;
- }
-
- gf_msg_trace(this->name, 0, "route resolved (me:%s peer:%s)",
- this->myinfo.identifier, this->peerinfo.identifier);
-
- ret = 0;
-out:
- return ret;
-}
-
-static int
-gf_rdma_cm_handle_addr_resolved(struct rdma_cm_event *event)
-{
- rpc_transport_t *this = NULL;
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_private_t *priv = NULL;
- int ret = 0;
-
- this = event->id->context;
-
- priv = this->private;
- peer = &priv->peer;
-
- GF_ASSERT(peer->cm_id == event->id);
-
- this->myinfo.sockaddr_len = sizeof(peer->cm_id->route.addr.src_addr);
- memcpy(&this->myinfo.sockaddr, &peer->cm_id->route.addr.src_addr,
- this->myinfo.sockaddr_len);
-
- this->peerinfo.sockaddr_len = sizeof(peer->cm_id->route.addr.dst_addr);
- memcpy(&this->peerinfo.sockaddr, &peer->cm_id->route.addr.dst_addr,
- this->peerinfo.sockaddr_len);
-
- gf_rdma_get_transport_identifiers(this);
-
- ret = rdma_resolve_route(peer->cm_id, 2000);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, errno, RDMA_MSG_ROUTE_RESOLVE_FAILED,
- "rdma_resolve_route "
- "failed (me:%s peer:%s)",
- this->myinfo.identifier, this->peerinfo.identifier);
- gf_rdma_cm_handle_disconnect(this);
- return ret;
- }
-
- gf_msg_trace(this->name, 0, "Address resolved (me:%s peer:%s)",
- this->myinfo.identifier, this->peerinfo.identifier);
-
- return ret;
-}
-
-static void
-gf_rdma_cm_handle_disconnect(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- char need_unref = 0;
-
- priv = this->private;
- gf_msg_debug(this->name, 0, "peer disconnected, cleaning up");
-
- pthread_mutex_lock(&priv->write_mutex);
- {
- if (priv->peer.cm_id != NULL) {
- need_unref = 1;
- priv->connected = 0;
- }
-
- __gf_rdma_teardown(this);
- }
- pthread_mutex_unlock(&priv->write_mutex);
-
- rpc_transport_notify(this, RPC_TRANSPORT_DISCONNECT, this);
-
- if (need_unref)
- rpc_transport_unref(this);
-}
-
-static int
-gf_rdma_cm_handle_connect_init(struct rdma_cm_event *event)
-{
- rpc_transport_t *this = NULL;
- gf_rdma_private_t *priv = NULL;
- struct rdma_cm_id *cm_id = NULL;
- int ret = 0;
-
- cm_id = event->id;
- this = cm_id->context;
- priv = this->private;
-
- if (priv->connected == 1) {
- gf_msg_trace(this->name, 0,
- "received event "
- "RDMA_CM_EVENT_ESTABLISHED (me:%s peer:%s)",
- this->myinfo.identifier, this->peerinfo.identifier);
- return ret;
- }
-
- priv->connected = 1;
-
- pthread_mutex_lock(&priv->write_mutex);
- {
- priv->peer.quota = 1;
- priv->peer.quota_set = 0;
- }
- pthread_mutex_unlock(&priv->write_mutex);
-
- if (priv->entity == GF_RDMA_CLIENT) {
- gf_msg_trace(this->name, 0,
- "received event "
- "RDMA_CM_EVENT_ESTABLISHED (me:%s peer:%s)",
- this->myinfo.identifier, this->peerinfo.identifier);
- ret = rpc_transport_notify(this, RPC_TRANSPORT_CONNECT, this);
-
- } else if (priv->entity == GF_RDMA_SERVER) {
- ret = rpc_transport_notify(priv->listener, RPC_TRANSPORT_ACCEPT, this);
- }
-
- if (ret < 0) {
- gf_rdma_disconnect(this, _gf_false);
- }
-
- return ret;
-}
-
-static int
-gf_rdma_cm_handle_event_error(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
-
- if (priv->entity != GF_RDMA_SERVER_LISTENER) {
- gf_rdma_cm_handle_disconnect(this);
- }
-
- return 0;
-}
-
-static int
-gf_rdma_cm_handle_device_removal(struct rdma_cm_event *event)
-{
- return 0;
-}
-
-static void *
-gf_rdma_cm_event_handler(void *data)
-{
- struct rdma_cm_event *event = NULL;
- int ret = 0;
- rpc_transport_t *this = NULL;
- struct rdma_event_channel *event_channel = NULL;
-
- event_channel = data;
-
- while (1) {
- ret = rdma_get_cm_event(event_channel, &event);
- if (ret != 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
- RDMA_MSG_CM_EVENT_FAILED, "rdma_cm_get_event failed");
- break;
- }
-
- switch (event->event) {
- case RDMA_CM_EVENT_ADDR_RESOLVED:
- gf_rdma_cm_handle_addr_resolved(event);
- break;
-
- case RDMA_CM_EVENT_ROUTE_RESOLVED:
- gf_rdma_cm_handle_route_resolved(event);
- break;
-
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- gf_rdma_cm_handle_connect_request(event);
- break;
-
- case RDMA_CM_EVENT_ESTABLISHED:
- gf_rdma_cm_handle_connect_init(event);
- break;
-
- case RDMA_CM_EVENT_ADDR_ERROR:
- case RDMA_CM_EVENT_ROUTE_ERROR:
- case RDMA_CM_EVENT_CONNECT_ERROR:
- case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
- this = event->id->context;
-
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_CM_EVENT_FAILED,
- "cma event %s, "
- "error %d (me:%s peer:%s)\n",
- rdma_event_str(event->event), event->status,
- this->myinfo.identifier, this->peerinfo.identifier);
-
- rdma_ack_cm_event(event);
- event = NULL;
-
- gf_rdma_cm_handle_event_error(this);
- continue;
-
- case RDMA_CM_EVENT_DISCONNECTED:
- this = event->id->context;
-
- gf_msg_debug(this->name, 0,
- "received disconnect "
- "(me:%s peer:%s)\n",
- this->myinfo.identifier,
- this->peerinfo.identifier);
-
- rdma_ack_cm_event(event);
- event = NULL;
-
- gf_rdma_cm_handle_disconnect(this);
- continue;
-
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_CM_EVENT_FAILED,
- "device "
- "removed");
- gf_rdma_cm_handle_device_removal(event);
- break;
-
- default:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_CM_EVENT_FAILED,
- "unhandled event: %s, ignoring",
- rdma_event_str(event->event));
- break;
- }
-
- rdma_ack_cm_event(event);
- }
-
- return NULL;
-}
-
-static int32_t
-gf_rdma_post_send(struct ibv_qp *qp, gf_rdma_post_t *post, int32_t len)
-{
- struct ibv_sge list = {.addr = (unsigned long)post->buf,
- .length = len,
- .lkey = post->mr->lkey};
-
- struct ibv_send_wr wr =
- {
- .wr_id = (unsigned long)post,
- .sg_list = &list,
- .num_sge = 1,
- .opcode = IBV_WR_SEND,
- .send_flags = IBV_SEND_SIGNALED,
- },
- *bad_wr;
-
- if (!qp)
- return EINVAL;
-
- return ibv_post_send(qp, &wr, &bad_wr);
-}
-
-int
-__gf_rdma_encode_error(gf_rdma_peer_t *peer, gf_rdma_reply_info_t *reply_info,
- struct iovec *rpchdr, gf_rdma_header_t *hdr,
- gf_rdma_errcode_t err)
-{
- struct rpc_msg *rpc_msg = NULL;
-
- if (reply_info != NULL) {
- hdr->rm_xid = hton32(reply_info->rm_xid);
- } else {
- rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
- * only one vector.
- * (which is true)
- */
- hdr->rm_xid = rpc_msg->rm_xid;
- }
-
- hdr->rm_vers = hton32(GF_RDMA_VERSION);
- hdr->rm_credit = hton32(peer->send_count);
- hdr->rm_type = hton32(GF_RDMA_ERROR);
- hdr->rm_body.rm_error.rm_type = hton32(err);
- if (err == ERR_VERS) {
- hdr->rm_body.rm_error.rm_version.gf_rdma_vers_low = hton32(
- GF_RDMA_VERSION);
- hdr->rm_body.rm_error.rm_version.gf_rdma_vers_high = hton32(
- GF_RDMA_VERSION);
- }
-
- return sizeof(*hdr);
-}
-
-int32_t
-__gf_rdma_send_error(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post, gf_rdma_reply_info_t *reply_info,
- gf_rdma_errcode_t err)
-{
- int32_t ret = -1, len = 0;
-
- len = __gf_rdma_encode_error(peer, reply_info, entry->rpchdr,
- (gf_rdma_header_t *)post->buf, err);
- if (len == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, 0, RDMA_MSG_ENCODE_ERROR,
- "encode error returned -1");
- goto out;
- }
-
- gf_rdma_post_ref(post);
-
- ret = gf_rdma_post_send(peer->qp, post, len);
- if (!ret) {
- ret = len;
- } else {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_POST_SEND_FAILED,
- "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror(ret) : "");
- gf_rdma_post_unref(post);
- __gf_rdma_disconnect(peer->trans);
- ret = -1;
- }
-
-out:
- return ret;
-}
-
-int32_t
-__gf_rdma_create_read_chunks_from_vector(gf_rdma_peer_t *peer,
- gf_rdma_read_chunk_t **readch_ptr,
- int32_t *pos, struct iovec *vector,
- int count,
- gf_rdma_request_context_t *request_ctx)
-{
- int i = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_mr *mr = NULL;
- gf_rdma_read_chunk_t *readch = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, readch_ptr, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, *readch_ptr, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, vector, out);
-
- priv = peer->trans->private;
- device = priv->device;
- readch = *readch_ptr;
-
- for (i = 0; i < count; i++) {
- readch->rc_discrim = hton32(1);
- readch->rc_position = hton32(*pos);
-
- mr = gf_rdma_get_pre_registred_mr(
- peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
- if (!mr) {
- mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
- IBV_ACCESS_REMOTE_READ);
- }
- if (!mr) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
- RDMA_MSG_MR_ALOC_FAILED,
- "memory registration failed (peer:%s)",
- peer->trans->peerinfo.identifier);
- goto out;
- }
-
- request_ctx->mr[request_ctx->mr_count++] = mr;
-
- readch->rc_target.rs_handle = hton32(mr->rkey);
- readch->rc_target.rs_length = hton32(vector[i].iov_len);
- readch->rc_target.rs_offset = hton64(
- (uint64_t)(unsigned long)vector[i].iov_base);
-
- *pos = *pos + vector[i].iov_len;
- readch++;
- }
-
- *readch_ptr = readch;
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-__gf_rdma_create_read_chunks(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_chunktype_t type, uint32_t **ptr,
- gf_rdma_request_context_t *request_ctx)
-{
- int32_t ret = -1;
- int pos = 0;
-
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, entry, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, ptr, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, *ptr, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, request_ctx, out);
-
- request_ctx->iobref = iobref_ref(entry->iobref);
-
- if (type == gf_rdma_areadch) {
- pos = 0;
- ret = __gf_rdma_create_read_chunks_from_vector(
- peer, (gf_rdma_read_chunk_t **)ptr, &pos, entry->rpchdr,
- entry->rpchdr_count, request_ctx);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_READ_CHUNK_VECTOR_FAILED,
- "cannot create read chunks from vector "
- "entry->rpchdr");
- goto out;
- }
-
- ret = __gf_rdma_create_read_chunks_from_vector(
- peer, (gf_rdma_read_chunk_t **)ptr, &pos, entry->proghdr,
- entry->proghdr_count, request_ctx);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_READ_CHUNK_VECTOR_FAILED,
- "cannot create read chunks from vector "
- "entry->proghdr");
- }
-
- if (entry->prog_payload_count != 0) {
- ret = __gf_rdma_create_read_chunks_from_vector(
- peer, (gf_rdma_read_chunk_t **)ptr, &pos, entry->prog_payload,
- entry->prog_payload_count, request_ctx);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_READ_CHUNK_VECTOR_FAILED,
- "cannot create read chunks from vector"
- " entry->prog_payload");
- }
- }
- } else {
- pos = iov_length(entry->rpchdr, entry->rpchdr_count);
- ret = __gf_rdma_create_read_chunks_from_vector(
- peer, (gf_rdma_read_chunk_t **)ptr, &pos, entry->prog_payload,
- entry->prog_payload_count, request_ctx);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_READ_CHUNK_VECTOR_FAILED,
- "cannot create read chunks from vector "
- "entry->prog_payload");
- }
- }
-
- /* terminate read-chunk list*/
- **ptr = 0;
- *ptr = *ptr + 1;
-out:
- return ret;
-}
-
-int32_t
-__gf_rdma_create_write_chunks_from_vector(
- gf_rdma_peer_t *peer, gf_rdma_write_chunk_t **writech_ptr,
- struct iovec *vector, int count, gf_rdma_request_context_t *request_ctx)
-{
- int i = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_mr *mr = NULL;
- gf_rdma_write_chunk_t *writech = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, writech_ptr, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, *writech_ptr, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, vector, out);
-
- writech = *writech_ptr;
-
- priv = peer->trans->private;
- device = priv->device;
-
- for (i = 0; i < count; i++) {
- mr = gf_rdma_get_pre_registred_mr(
- peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
- if (!mr) {
- mr = ibv_reg_mr(device->pd, vector[i].iov_base, vector[i].iov_len,
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
- }
-
- if (!mr) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
- RDMA_MSG_MR_ALOC_FAILED,
- "memory "
- "registration failed (peer:%s)",
- peer->trans->peerinfo.identifier);
- goto out;
- }
-
- request_ctx->mr[request_ctx->mr_count++] = mr;
-
- writech->wc_target.rs_handle = hton32(mr->rkey);
- writech->wc_target.rs_length = hton32(vector[i].iov_len);
- writech->wc_target.rs_offset = hton64(
- ((uint64_t)(unsigned long)vector[i].iov_base));
-
- writech++;
- }
-
- *writech_ptr = writech;
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-__gf_rdma_create_write_chunks(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_chunktype_t chunk_type, uint32_t **ptr,
- gf_rdma_request_context_t *request_ctx)
-{
- int32_t ret = -1;
- gf_rdma_write_array_t *warray = NULL;
-
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, ptr, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, *ptr, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, entry, out);
-
- if ((chunk_type == gf_rdma_replych) &&
- ((entry->msg.request.rsphdr_count != 1) ||
- (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_BUFFER_ERROR,
- (entry->msg.request.rsphdr_count == 1)
- ? "chunktype specified as reply chunk but the vector "
- "specifying the buffer to be used for holding reply"
- " header is not correct"
- : "chunktype specified as reply chunk, but more than one "
- "buffer provided for holding reply");
- goto out;
- }
-
- /*
- if ((chunk_type == gf_rdma_writech)
- && ((entry->msg.request.rsphdr_count == 0)
- || (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) {
- gf_msg_debug (GF_RDMA_LOG_NAME, 0,
- "vector specifying buffer to hold the program's reply "
- "header should also be provided when buffers are "
- "provided for holding the program's payload in reply");
- goto out;
- }
- */
-
- if (chunk_type == gf_rdma_writech) {
- warray = (gf_rdma_write_array_t *)*ptr;
- warray->wc_discrim = hton32(1);
- warray->wc_nchunks = hton32(entry->msg.request.rsp_payload_count);
-
- *ptr = (uint32_t *)&warray->wc_array[0];
-
- ret = __gf_rdma_create_write_chunks_from_vector(
- peer, (gf_rdma_write_chunk_t **)ptr, entry->msg.request.rsp_payload,
- entry->msg.request.rsp_payload_count, request_ctx);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_WRITE_CHUNK_VECTOR_FAILED,
- "cannot create write chunks from vector "
- "entry->rpc_payload");
- goto out;
- }
-
- /* terminate write chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
-
- /* no reply chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
- } else {
- /* no write chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
-
- warray = (gf_rdma_write_array_t *)*ptr;
- warray->wc_discrim = hton32(1);
- warray->wc_nchunks = hton32(entry->msg.request.rsphdr_count);
-
- *ptr = (uint32_t *)&warray->wc_array[0];
-
- ret = __gf_rdma_create_write_chunks_from_vector(
- peer, (gf_rdma_write_chunk_t **)ptr, entry->msg.request.rsphdr_vec,
- entry->msg.request.rsphdr_count, request_ctx);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_WRITE_CHUNK_VECTOR_FAILED,
- "cannot create write chunks from vector "
- "entry->rpchdr");
- goto out;
- }
-
- /* terminate reply chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
- }
-
-out:
- return ret;
-}
-
-static void
-__gf_rdma_deregister_mr(gf_rdma_device_t *device, struct ibv_mr **mr, int count)
-{
- gf_rdma_arena_mr *tmp = NULL;
- gf_rdma_arena_mr *dummy = NULL;
- int i = 0;
- int found = 0;
-
- if (mr == NULL) {
- goto out;
- }
-
- for (i = 0; i < count; i++) {
- found = 0;
- pthread_mutex_lock(&device->all_mr_lock);
- {
- if (!list_empty(&device->all_mr)) {
- list_for_each_entry_safe(tmp, dummy, &device->all_mr, list)
- {
- if (tmp->mr == mr[i]) {
- found = 1;
- break;
- }
- }
- }
- }
- pthread_mutex_unlock(&device->all_mr_lock);
- if (!found)
- ibv_dereg_mr(mr[i]);
- }
-
-out:
- return;
-}
-
-static int32_t
-__gf_rdma_quota_put(gf_rdma_peer_t *peer)
-{
- int32_t ret = 0;
-
- peer->quota++;
- ret = peer->quota;
-
- if (!list_empty(&peer->ioq)) {
- ret = __gf_rdma_ioq_churn(peer);
- }
-
- return ret;
-}
-
-static int32_t
-gf_rdma_quota_put(gf_rdma_peer_t *peer)
-{
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
- pthread_mutex_lock(&priv->write_mutex);
- {
- ret = __gf_rdma_quota_put(peer);
- }
- pthread_mutex_unlock(&priv->write_mutex);
-
- return ret;
-}
-
-/* to be called with priv->mutex held */
-void
-__gf_rdma_request_context_destroy(gf_rdma_request_context_t *context)
-{
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- int32_t ret = 0;
-
- if (context == NULL) {
- goto out;
- }
-
- peer = context->peer;
-
- priv = peer->trans->private;
- device = priv->device;
- __gf_rdma_deregister_mr(device, context->mr, context->mr_count);
-
- if (priv->connected) {
- ret = __gf_rdma_quota_put(peer);
- if (ret < 0) {
- gf_msg_debug("rdma", 0, "failed to send message");
- mem_put(context);
- __gf_rdma_disconnect(peer->trans);
- goto out;
- }
- }
-
- if (context->iobref != NULL) {
- iobref_unref(context->iobref);
- context->iobref = NULL;
- }
-
- if (context->rsp_iobref != NULL) {
- iobref_unref(context->rsp_iobref);
- context->rsp_iobref = NULL;
- }
-
- mem_put(context);
-
-out:
- return;
-}
-
-void
-gf_rdma_post_context_destroy(gf_rdma_device_t *device,
- gf_rdma_post_context_t *ctx)
-{
- if (ctx == NULL) {
- goto out;
- }
-
- __gf_rdma_deregister_mr(device, ctx->mr, ctx->mr_count);
-
- if (ctx->iobref != NULL) {
- iobref_unref(ctx->iobref);
- }
-
- if (ctx->hdr_iobuf != NULL) {
- iobuf_unref(ctx->hdr_iobuf);
- }
-
- memset(ctx, 0, sizeof(*ctx));
-out:
- return;
-}
-
-int
-gf_rdma_post_unref(gf_rdma_post_t *post)
-{
- int refcount = -1;
-
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock(&post->lock);
- {
- refcount = --post->refcount;
- }
- pthread_mutex_unlock(&post->lock);
-
- if (refcount == 0) {
- gf_rdma_post_context_destroy(post->device, &post->ctx);
- if (post->type == GF_RDMA_SEND_POST) {
- gf_rdma_put_post(&post->device->sendq, post);
- } else {
- gf_rdma_post_recv(post->device->srq, post);
- }
- }
-out:
- return refcount;
-}
-
-int
-gf_rdma_post_get_refcount(gf_rdma_post_t *post)
-{
- int refcount = -1;
-
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock(&post->lock);
- {
- refcount = post->refcount;
- }
- pthread_mutex_unlock(&post->lock);
-
-out:
- return refcount;
-}
-
-gf_rdma_post_t *
-gf_rdma_post_ref(gf_rdma_post_t *post)
-{
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock(&post->lock);
- {
- post->refcount++;
- }
- pthread_mutex_unlock(&post->lock);
-
-out:
- return post;
-}
-
-int32_t
-__gf_rdma_ioq_churn_request(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post)
-{
- gf_rdma_chunktype_t rtype = gf_rdma_noch;
- gf_rdma_chunktype_t wtype = gf_rdma_noch;
- uint64_t send_size = 0;
- gf_rdma_header_t *hdr = NULL;
- struct rpc_msg *rpc_msg = NULL;
- uint32_t *chunkptr = NULL;
- char *buf = NULL;
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- int chunk_count = 0;
- gf_rdma_request_context_t *request_ctx = NULL;
- uint32_t prog_payload_length = 0, len = 0;
- struct rpc_req *rpc_req = NULL;
-
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, entry, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, post, out);
-
- if ((entry->msg.request.rsphdr_count != 0) &&
- (entry->msg.request.rsp_payload_count != 0)) {
- ret = -1;
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_WRITE_REPLY_CHUNCK_CONFLICT,
- "both write-chunklist and reply-chunk cannot be "
- "present");
- goto out;
- }
-
- post->ctx.is_request = 1;
- priv = peer->trans->private;
- device = priv->device;
-
- hdr = (gf_rdma_header_t *)post->buf;
-
- send_size = iov_length(entry->rpchdr, entry->rpchdr_count) +
- iov_length(entry->proghdr, entry->proghdr_count) +
- GLUSTERFS_RDMA_MAX_HEADER_SIZE;
-
- if (entry->prog_payload_count != 0) {
- prog_payload_length = iov_length(entry->prog_payload,
- entry->prog_payload_count);
- }
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- rtype = gf_rdma_areadch;
- } else if ((send_size + prog_payload_length) <
- GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- rtype = gf_rdma_noch;
- } else if (entry->prog_payload_count != 0) {
- rtype = gf_rdma_readch;
- }
-
- if (entry->msg.request.rsphdr_count != 0) {
- wtype = gf_rdma_replych;
- } else if (entry->msg.request.rsp_payload_count != 0) {
- wtype = gf_rdma_writech;
- }
-
- if (rtype == gf_rdma_readch) {
- chunk_count += entry->prog_payload_count;
- } else if (rtype == gf_rdma_areadch) {
- chunk_count += entry->rpchdr_count;
- chunk_count += entry->proghdr_count;
- }
-
- if (wtype == gf_rdma_writech) {
- chunk_count += entry->msg.request.rsp_payload_count;
- } else if (wtype == gf_rdma_replych) {
- chunk_count += entry->msg.request.rsphdr_count;
- }
-
- if (chunk_count > GF_RDMA_MAX_SEGMENTS) {
- ret = -1;
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_CHUNK_COUNT_GREAT_MAX_SEGMENTS,
- "chunk count(%d) exceeding maximum allowed RDMA "
- "segment count(%d)",
- chunk_count, GF_RDMA_MAX_SEGMENTS);
- goto out;
- }
-
- if ((wtype != gf_rdma_noch) || (rtype != gf_rdma_noch)) {
- request_ctx = mem_get(device->request_ctx_pool);
- if (request_ctx == NULL) {
- ret = -1;
- goto out;
- }
-
- memset(request_ctx, 0, sizeof(*request_ctx));
-
- request_ctx->pool = device->request_ctx_pool;
- request_ctx->peer = peer;
-
- entry->msg.request.rpc_req->conn_private = request_ctx;
-
- if (entry->msg.request.rsp_iobref != NULL) {
- request_ctx->rsp_iobref = iobref_ref(entry->msg.request.rsp_iobref);
- }
- }
-
- rpc_msg = (struct rpc_msg *)entry->rpchdr[0].iov_base;
-
- hdr->rm_xid = rpc_msg->rm_xid; /* no need of hton32(rpc_msg->rm_xid),
- * since rpc_msg->rm_xid is already
- * hton32ed value of actual xid
- */
- hdr->rm_vers = hton32(GF_RDMA_VERSION);
- hdr->rm_credit = hton32(peer->send_count);
- if (rtype == gf_rdma_areadch) {
- hdr->rm_type = hton32(GF_RDMA_NOMSG);
- } else {
- hdr->rm_type = hton32(GF_RDMA_MSG);
- }
-
- chunkptr = &hdr->rm_body.rm_chunks[0];
- if (rtype != gf_rdma_noch) {
- ret = __gf_rdma_create_read_chunks(peer, entry, rtype, &chunkptr,
- request_ctx);
- if (ret != 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_CREATE_READ_CHUNK_FAILED,
- "creation of read chunks failed");
- goto out;
- }
- } else {
- *chunkptr++ = 0; /* no read chunks */
- }
-
- if (wtype != gf_rdma_noch) {
- ret = __gf_rdma_create_write_chunks(peer, entry, wtype, &chunkptr,
- request_ctx);
- if (ret != 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_CREATE_WRITE_REPLAY_FAILED,
- "creation of write/reply chunk failed");
- goto out;
- }
- } else {
- *chunkptr++ = 0; /* no write chunks */
- *chunkptr++ = 0; /* no reply chunk */
- }
-
- buf = (char *)chunkptr;
-
- if (rtype != gf_rdma_areadch) {
- iov_unload(buf, entry->rpchdr, entry->rpchdr_count);
- buf += iov_length(entry->rpchdr, entry->rpchdr_count);
-
- iov_unload(buf, entry->proghdr, entry->proghdr_count);
- buf += iov_length(entry->proghdr, entry->proghdr_count);
-
- if (rtype != gf_rdma_readch) {
- iov_unload(buf, entry->prog_payload, entry->prog_payload_count);
- buf += iov_length(entry->prog_payload, entry->prog_payload_count);
- }
- }
-
- len = buf - post->buf;
-
- gf_rdma_post_ref(post);
-
- ret = gf_rdma_post_send(peer->qp, post, len);
- if (!ret) {
- ret = len;
- } else {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_POST_SEND_FAILED,
- "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror(ret) : "");
- gf_rdma_post_unref(post);
- __gf_rdma_disconnect(peer->trans);
- ret = -1;
- }
-
-out:
- if (ret == -1) {
- rpc_req = entry->msg.request.rpc_req;
-
- if (request_ctx != NULL) {
- __gf_rdma_request_context_destroy(rpc_req->conn_private);
- }
-
- rpc_req->conn_private = NULL;
- }
-
- return ret;
-}
-
-static void
-__gf_rdma_fill_reply_header(gf_rdma_header_t *header, struct iovec *rpchdr,
- gf_rdma_reply_info_t *reply_info, int credits)
-{
- struct rpc_msg *rpc_msg = NULL;
-
- if (reply_info != NULL) {
- header->rm_xid = hton32(reply_info->rm_xid);
- } else {
- rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
- * only one vector.
- * (which is true)
- */
- header->rm_xid = rpc_msg->rm_xid;
- }
-
- header->rm_type = hton32(GF_RDMA_MSG);
- header->rm_vers = hton32(GF_RDMA_VERSION);
- header->rm_credit = hton32(credits);
-
- header->rm_body.rm_chunks[0] = 0; /* no read chunks */
- header->rm_body.rm_chunks[1] = 0; /* no write chunks */
- header->rm_body.rm_chunks[2] = 0; /* no reply chunks */
-
- return;
-}
-
-int32_t
-__gf_rdma_send_reply_inline(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- int32_t send_size = 0, ret = 0;
- char *buf = NULL;
-
- send_size = iov_length(entry->rpchdr, entry->rpchdr_count) +
- iov_length(entry->proghdr, entry->proghdr_count) +
- iov_length(entry->prog_payload, entry->prog_payload_count) +
- sizeof(gf_rdma_header_t); /*
- * remember, no chunklists in the
- * reply
- */
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- ret = __gf_rdma_send_error(peer, entry, post, reply_info, ERR_CHUNK);
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_SEND_SIZE_GREAT_INLINE_THRESHOLD,
- "msg size (%d) is greater than maximum size "
- "of msg that can be sent inlined (%d)",
- send_size, GLUSTERFS_RDMA_INLINE_THRESHOLD);
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header(header, entry->rpchdr, reply_info,
- peer->send_count);
-
- buf = (char *)&header->rm_body.rm_chunks[3];
-
- if (entry->rpchdr_count != 0) {
- iov_unload(buf, entry->rpchdr, entry->rpchdr_count);
- buf += iov_length(entry->rpchdr, entry->rpchdr_count);
- }
-
- if (entry->proghdr_count != 0) {
- iov_unload(buf, entry->proghdr, entry->proghdr_count);
- buf += iov_length(entry->proghdr, entry->proghdr_count);
- }
-
- if (entry->prog_payload_count != 0) {
- iov_unload(buf, entry->prog_payload, entry->prog_payload_count);
- buf += iov_length(entry->prog_payload, entry->prog_payload_count);
- }
-
- gf_rdma_post_ref(post);
-
- ret = gf_rdma_post_send(peer->qp, post, (buf - post->buf));
- if (!ret) {
- ret = send_size;
- } else {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_POST_SEND_FAILED,
- "posting send (to %s) "
- "failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror(ret) : "");
- gf_rdma_post_unref(post);
- __gf_rdma_disconnect(peer->trans);
- ret = -1;
- }
-
-out:
- return ret;
-}
-
-int32_t
-__gf_rdma_reply_encode_write_chunks(gf_rdma_peer_t *peer, uint32_t payload_size,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info,
- uint32_t **ptr)
-{
- uint32_t chunk_size = 0;
- int32_t ret = -1;
- gf_rdma_write_array_t *target_array = NULL;
- int i = 0;
-
- target_array = (gf_rdma_write_array_t *)*ptr;
-
- for (i = 0; i < reply_info->wc_array->wc_nchunks; i++) {
- chunk_size += reply_info->wc_array->wc_array[i].wc_target.rs_length;
- }
-
- if (chunk_size < payload_size) {
- gf_msg_debug(GF_RDMA_LOG_NAME, 0,
- "length of payload (%d) is "
- "exceeding the total write chunk length (%d)",
- payload_size, chunk_size);
- goto out;
- }
-
- target_array->wc_discrim = hton32(1);
- for (i = 0; (i < reply_info->wc_array->wc_nchunks) && (payload_size != 0);
- i++) {
- target_array->wc_array[i].wc_target.rs_offset = hton64(
- reply_info->wc_array->wc_array[i].wc_target.rs_offset);
-
- target_array->wc_array[i].wc_target.rs_length = hton32(
- min(payload_size,
- reply_info->wc_array->wc_array[i].wc_target.rs_length));
- }
-
- target_array->wc_nchunks = hton32(i);
- target_array->wc_array[i].wc_target.rs_handle = 0; /* terminate
- chunklist */
-
- ret = 0;
-
- *ptr = &target_array->wc_array[i].wc_target.rs_length;
-out:
- return ret;
-}
-
-static int32_t
-__gf_rdma_register_local_mr_for_rdma(gf_rdma_peer_t *peer, struct iovec *vector,
- int count, gf_rdma_post_context_t *ctx)
-{
- int i = 0;
- int32_t ret = -1;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, vector, out);
-
- priv = peer->trans->private;
- device = priv->device;
-
- for (i = 0; i < count; i++) {
- /* what if the memory is registered more than once?
- * Assume that a single write buffer is passed to afr, which
- * then passes it to its children. If more than one children
- * happen to use rdma, then the buffer is registered more than
- * once.
- * Ib-verbs specification says that multiple registrations of
- * same memory location is allowed. Refer to 10.6.3.8 of
- * Infiniband Architecture Specification Volume 1
- * (Release 1.2.1)
- */
- ctx->mr[ctx->mr_count] = gf_rdma_get_pre_registred_mr(
- peer->trans, (void *)vector[i].iov_base, vector[i].iov_len);
-
- if (!ctx->mr[ctx->mr_count]) {
- ctx->mr[ctx->mr_count] = ibv_reg_mr(device->pd, vector[i].iov_base,
- vector[i].iov_len,
- IBV_ACCESS_LOCAL_WRITE);
- }
- if (ctx->mr[ctx->mr_count] == NULL) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
- RDMA_MSG_MR_ALOC_FAILED,
- "registering memory for IBV_ACCESS_LOCAL_WRITE"
- " failed");
- goto out;
- }
-
- ctx->mr_count++;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/* 1. assumes xfer_len of data is pointed by vector(s) starting from vec[*idx]
- * 2. modifies vec
- */
-int32_t
-__gf_rdma_write(gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *vec,
- uint32_t xfer_len, int *idx, gf_rdma_write_chunk_t *writech)
-{
- int size = 0, num_sge = 0, i = 0;
- int32_t ret = -1;
- struct ibv_sge *sg_list = NULL;
- struct ibv_send_wr wr =
- {
- .opcode = IBV_WR_RDMA_WRITE,
- .send_flags = IBV_SEND_SIGNALED,
- },
- *bad_wr;
-
- if ((peer == NULL) || (writech == NULL) || (idx == NULL) ||
- (post == NULL) || (vec == NULL) || (xfer_len == 0)) {
- goto out;
- }
-
- for (i = *idx; size < xfer_len; i++) {
- size += vec[i].iov_len;
- }
-
- num_sge = i - *idx;
-
- sg_list = GF_CALLOC(num_sge, sizeof(struct ibv_sge), gf_common_mt_sge);
- if (sg_list == NULL) {
- ret = -1;
- goto out;
- }
-
- for ((i = *idx), (num_sge = 0); (xfer_len != 0); i++, num_sge++) {
- size = min(xfer_len, vec[i].iov_len);
-
- sg_list[num_sge].addr = (unsigned long)vec[i].iov_base;
- sg_list[num_sge].length = size;
- sg_list[num_sge].lkey = post->ctx.mr[i]->lkey;
-
- xfer_len -= size;
- }
-
- *idx = i;
-
- if (size < vec[i - 1].iov_len) {
- vec[i - 1].iov_base += size;
- vec[i - 1].iov_len -= size;
- *idx = i - 1;
- }
-
- wr.sg_list = sg_list;
- wr.num_sge = num_sge;
- wr.wr_id = (unsigned long)gf_rdma_post_ref(post);
- wr.wr.rdma.rkey = writech->wc_target.rs_handle;
- wr.wr.rdma.remote_addr = writech->wc_target.rs_offset;
-
- ret = ibv_post_send(peer->qp, &wr, &bad_wr);
- if (ret) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_WRITE_CLIENT_ERROR,
- "rdma write to "
- "client (%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror(ret) : "");
- ret = -1;
- }
-
- GF_FREE(sg_list);
-out:
- return ret;
-}
-
-int32_t
-__gf_rdma_do_gf_rdma_write(gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- struct iovec *vector, int count,
- struct iobref *iobref,
- gf_rdma_reply_info_t *reply_info)
-{
- int i = 0, payload_idx = 0;
- uint32_t payload_size = 0, xfer_len = 0;
- int32_t ret = -1;
-
- if (count != 0) {
- payload_size = iov_length(vector, count);
- }
-
- if (payload_size == 0) {
- ret = 0;
- goto out;
- }
-
- ret = __gf_rdma_register_local_mr_for_rdma(peer, vector, count, &post->ctx);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_MR_ALOC_FAILED,
- "registering memory region for rdma failed");
- goto out;
- }
-
- post->ctx.iobref = iobref_ref(iobref);
-
- for (i = 0; (i < reply_info->wc_array->wc_nchunks) && (payload_size != 0);
- i++) {
- xfer_len = min(payload_size,
- reply_info->wc_array->wc_array[i].wc_target.rs_length);
-
- ret = __gf_rdma_write(peer, post, vector, xfer_len, &payload_idx,
- &reply_info->wc_array->wc_array[i]);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_WRITE_CLIENT_ERROR,
- "rdma write to "
- "client (%s) failed",
- peer->trans->peerinfo.identifier);
- goto out;
- }
-
- payload_size -= xfer_len;
- }
-
- ret = 0;
-out:
-
- return ret;
-}
-
-int32_t
-__gf_rdma_send_reply_type_nomsg(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- char *buf = NULL;
- uint32_t payload_size = 0;
- int count = 0, i = 0;
- int32_t ret = 0;
- struct iovec vector[MAX_IOVEC];
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header(header, entry->rpchdr, reply_info,
- peer->send_count);
-
- header->rm_type = hton32(GF_RDMA_NOMSG);
-
- payload_size = iov_length(entry->rpchdr, entry->rpchdr_count) +
- iov_length(entry->proghdr, entry->proghdr_count);
-
- /* encode reply chunklist */
- buf = (char *)&header->rm_body.rm_chunks[2];
- ret = __gf_rdma_reply_encode_write_chunks(peer, payload_size, post,
- reply_info, (uint32_t **)&buf);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_ENCODE_ERROR,
- "encoding write chunks failed");
- ret = __gf_rdma_send_error(peer, entry, post, reply_info, ERR_CHUNK);
- goto out;
- }
-
- gf_rdma_post_ref(post);
-
- for (i = 0; i < entry->rpchdr_count; i++) {
- vector[count++] = entry->rpchdr[i];
- }
-
- for (i = 0; i < entry->proghdr_count; i++) {
- vector[count++] = entry->proghdr[i];
- }
-
- ret = __gf_rdma_do_gf_rdma_write(peer, post, vector, count, entry->iobref,
- reply_info);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_WRITE_PEER_FAILED,
- "rdma write to peer "
- "(%s) failed",
- peer->trans->peerinfo.identifier);
- gf_rdma_post_unref(post);
- goto out;
- }
-
- ret = gf_rdma_post_send(peer->qp, post, (buf - post->buf));
- if (ret) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_POST_SEND_FAILED,
- "posting a send request "
- "to client (%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror(ret) : "");
- ret = -1;
- gf_rdma_post_unref(post);
- } else {
- ret = payload_size;
- }
-
-out:
- return ret;
-}
-
-int32_t
-__gf_rdma_send_reply_type_msg(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- int32_t send_size = 0, ret = 0;
- char *ptr = NULL;
- uint32_t payload_size = 0;
-
- send_size = iov_length(entry->rpchdr, entry->rpchdr_count) +
- iov_length(entry->proghdr, entry->proghdr_count) +
- GLUSTERFS_RDMA_MAX_HEADER_SIZE;
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_SEND_SIZE_GREAT_INLINE_THRESHOLD,
- "client has provided only write chunks, but the "
- "combined size of rpc and program header (%d) is "
- "exceeding the size of msg that can be sent using "
- "RDMA send (%d)",
- send_size, GLUSTERFS_RDMA_INLINE_THRESHOLD);
-
- ret = __gf_rdma_send_error(peer, entry, post, reply_info, ERR_CHUNK);
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header(header, entry->rpchdr, reply_info,
- peer->send_count);
-
- payload_size = iov_length(entry->prog_payload, entry->prog_payload_count);
- ptr = (char *)&header->rm_body.rm_chunks[1];
-
- ret = __gf_rdma_reply_encode_write_chunks(peer, payload_size, post,
- reply_info, (uint32_t **)&ptr);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_ENCODE_ERROR,
- "encoding write chunks failed");
- ret = __gf_rdma_send_error(peer, entry, post, reply_info, ERR_CHUNK);
- goto out;
- }
-
- *(uint32_t *)ptr = 0; /* terminate reply chunklist */
- ptr += sizeof(uint32_t);
-
- gf_rdma_post_ref(post);
-
- ret = __gf_rdma_do_gf_rdma_write(peer, post, entry->prog_payload,
- entry->prog_payload_count, entry->iobref,
- reply_info);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_WRITE_PEER_FAILED,
- "rdma write to peer "
- "(%s) failed",
- peer->trans->peerinfo.identifier);
- gf_rdma_post_unref(post);
- goto out;
- }
-
- iov_unload(ptr, entry->rpchdr, entry->rpchdr_count);
- ptr += iov_length(entry->rpchdr, entry->rpchdr_count);
-
- iov_unload(ptr, entry->proghdr, entry->proghdr_count);
- ptr += iov_length(entry->proghdr, entry->proghdr_count);
-
- ret = gf_rdma_post_send(peer->qp, post, (ptr - post->buf));
- if (ret) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_SEND_CLIENT_ERROR,
- "rdma send to client (%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror(ret) : "");
- gf_rdma_post_unref(post);
- ret = -1;
- } else {
- ret = send_size + payload_size;
- }
-
-out:
- return ret;
-}
-
-void
-gf_rdma_reply_info_destroy(gf_rdma_reply_info_t *reply_info)
-{
- if (reply_info == NULL) {
- goto out;
- }
-
- if (reply_info->wc_array != NULL) {
- GF_FREE(reply_info->wc_array);
- reply_info->wc_array = NULL;
- }
-
- mem_put(reply_info);
-out:
- return;
-}
-
-gf_rdma_reply_info_t *
-gf_rdma_reply_info_alloc(gf_rdma_peer_t *peer)
-{
- gf_rdma_reply_info_t *reply_info = NULL;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
-
- reply_info = mem_get(priv->device->reply_info_pool);
- if (reply_info == NULL) {
- goto out;
- }
-
- memset(reply_info, 0, sizeof(*reply_info));
- reply_info->pool = priv->device->reply_info_pool;
-
-out:
- return reply_info;
-}
-
-int32_t
-__gf_rdma_ioq_churn_reply(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post)
-{
- gf_rdma_reply_info_t *reply_info = NULL;
- int32_t ret = -1;
- gf_rdma_chunktype_t type = gf_rdma_noch;
-
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, entry, out);
- GF_VALIDATE_OR_GOTO(GF_RDMA_LOG_NAME, post, out);
-
- reply_info = entry->msg.reply_info;
- if (reply_info != NULL) {
- type = reply_info->type;
- }
-
- switch (type) {
- case gf_rdma_noch:
- ret = __gf_rdma_send_reply_inline(peer, entry, post, reply_info);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_SEND_REPLY_FAILED,
- "failed to send reply to peer (%s) as an "
- "inlined rdma msg",
- peer->trans->peerinfo.identifier);
- }
- break;
-
- case gf_rdma_replych:
- ret = __gf_rdma_send_reply_type_nomsg(peer, entry, post,
- reply_info);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_SEND_REPLY_FAILED,
- "failed to send reply to peer (%s) as "
- "RDMA_NOMSG",
- peer->trans->peerinfo.identifier);
- }
- break;
-
- case gf_rdma_writech:
- ret = __gf_rdma_send_reply_type_msg(peer, entry, post, reply_info);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_SEND_REPLY_FAILED,
- "failed to send reply with write chunks "
- "to peer (%s)",
- peer->trans->peerinfo.identifier);
- }
- break;
-
- default:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_INVALID_CHUNK_TYPE,
- "invalid chunktype (%d) specified for sending reply "
- " (peer:%s)",
- type, peer->trans->peerinfo.identifier);
- break;
- }
-
- if (reply_info != NULL) {
- gf_rdma_reply_info_destroy(reply_info);
- }
-out:
- return ret;
-}
-
-int32_t
-__gf_rdma_ioq_churn_entry(gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry)
-{
- int32_t ret = 0, quota = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_post_t *post = NULL;
-
- priv = peer->trans->private;
- options = &priv->options;
- device = priv->device;
-
- quota = __gf_rdma_quota_get(peer);
- if (quota > 0) {
- post = gf_rdma_get_post(&device->sendq);
- if (post == NULL) {
- post = gf_rdma_new_post(peer->trans, device,
- (options->send_size + 2048),
- GF_RDMA_SEND_POST);
- }
-
- if (post == NULL) {
- ret = -1;
- gf_msg_callingfn(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_POST_SEND_FAILED,
- "not able to get a post to send msg");
- goto out;
- }
-
- if (entry->is_request) {
- ret = __gf_rdma_ioq_churn_request(peer, entry, post);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_PROC_IOQ_ENTRY_FAILED,
- "failed to process request ioq entry "
- "to peer(%s)",
- peer->trans->peerinfo.identifier);
- }
- } else {
- ret = __gf_rdma_ioq_churn_reply(peer, entry, post);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_PROC_IOQ_ENTRY_FAILED,
- "failed to process reply ioq entry "
- "to peer (%s)",
- peer->trans->peerinfo.identifier);
- }
- }
-
- if (ret != 0) {
- __gf_rdma_ioq_entry_free(entry);
- }
- } else {
- ret = 0;
- }
-
-out:
- return ret;
-}
-
-static int32_t
-__gf_rdma_ioq_churn(gf_rdma_peer_t *peer)
-{
- gf_rdma_ioq_t *entry = NULL;
- int32_t ret = 0;
-
- while (!list_empty(&peer->ioq)) {
- /* pick next entry */
- entry = peer->ioq_next;
-
- ret = __gf_rdma_ioq_churn_entry(peer, entry);
-
- if (ret <= 0)
- break;
- }
-
- /*
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- ret = __gf_rdma_ioq_churn_entry (peer, entry);
- if (ret <= 0) {
- break;
- }
- }
- */
-
- return ret;
-}
-
-static int32_t
-gf_rdma_writev(rpc_transport_t *this, gf_rdma_ioq_t *entry)
-{
- int32_t ret = 0, need_append = 1;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_peer_t *peer = NULL;
-
- priv = this->private;
- pthread_mutex_lock(&priv->write_mutex);
- {
- if (!priv->connected) {
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_PEER_DISCONNECTED,
- "rdma is not connected to peer (%s)",
- this->peerinfo.identifier);
- ret = -1;
- goto unlock;
- }
-
- peer = &priv->peer;
- if (list_empty(&peer->ioq)) {
- ret = __gf_rdma_ioq_churn_entry(peer, entry);
- if (ret != 0) {
- need_append = 0;
-
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0,
- RDMA_MSG_PROC_IOQ_ENTRY_FAILED,
- "processing ioq entry destined"
- " to (%s) failed",
- this->peerinfo.identifier);
- }
- }
- }
-
- if (need_append) {
- list_add_tail(&entry->list, &peer->ioq);
- }
- }
-unlock:
- pthread_mutex_unlock(&priv->write_mutex);
- return ret;
-}
-
-gf_rdma_ioq_t *
-gf_rdma_ioq_new(rpc_transport_t *this, rpc_transport_data_t *data)
-{
- gf_rdma_ioq_t *entry = NULL;
- int count = 0, i = 0;
- rpc_transport_msg_t *msg = NULL;
- gf_rdma_private_t *priv = NULL;
-
- if ((data == NULL) || (this == NULL)) {
- goto out;
- }
-
- priv = this->private;
-
- entry = mem_get(priv->device->ioq_pool);
- if (entry == NULL) {
- goto out;
- }
- memset(entry, 0, sizeof(*entry));
- entry->pool = priv->device->ioq_pool;
-
- if (data->is_request) {
- msg = &data->data.req.msg;
- if (data->data.req.rsp.rsphdr_count != 0) {
- for (i = 0; i < data->data.req.rsp.rsphdr_count; i++) {
- entry->msg.request.rsphdr_vec[i] = data->data.req.rsp.rsphdr[i];
- }
-
- entry->msg.request.rsphdr_count = data->data.req.rsp.rsphdr_count;
- }
-
- if (data->data.req.rsp.rsp_payload_count != 0) {
- for (i = 0; i < data->data.req.rsp.rsp_payload_count; i++) {
- entry->msg.request.rsp_payload[i] = data->data.req.rsp
- .rsp_payload[i];
- }
-
- entry->msg.request.rsp_payload_count = data->data.req.rsp
- .rsp_payload_count;
- }
-
- entry->msg.request.rpc_req = data->data.req.rpc_req;
-
- if (data->data.req.rsp.rsp_iobref != NULL) {
- entry->msg.request.rsp_iobref = iobref_ref(
- data->data.req.rsp.rsp_iobref);
- }
- } else {
- msg = &data->data.reply.msg;
- entry->msg.reply_info = data->data.reply.private;
- }
-
- entry->is_request = data->is_request;
-
- count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
-
- GF_ASSERT(count <= MAX_IOVEC);
-
- if (msg->rpchdr != NULL) {
- memcpy(&entry->rpchdr[0], msg->rpchdr,
- sizeof(struct iovec) * msg->rpchdrcount);
- entry->rpchdr_count = msg->rpchdrcount;
- }
-
- if (msg->proghdr != NULL) {
- memcpy(&entry->proghdr[0], msg->proghdr,
- sizeof(struct iovec) * msg->proghdrcount);
- entry->proghdr_count = msg->proghdrcount;
- }
-
- if (msg->progpayload != NULL) {
- memcpy(&entry->prog_payload[0], msg->progpayload,
- sizeof(struct iovec) * msg->progpayloadcount);
- entry->prog_payload_count = msg->progpayloadcount;
- }
-
- if (msg->iobref != NULL) {
- entry->iobref = iobref_ref(msg->iobref);
- }
-
- INIT_LIST_HEAD(&entry->list);
-
-out:
- return entry;
-}
-
-int32_t
-gf_rdma_submit_request(rpc_transport_t *this, rpc_transport_req_t *req)
-{
- int32_t ret = 0;
- gf_rdma_ioq_t *entry = NULL;
- rpc_transport_data_t data;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_peer_t *peer = NULL;
-
- if (req == NULL) {
- goto out;
- }
-
- priv = this->private;
- if (priv == NULL) {
- ret = -1;
- goto out;
- }
-
- peer = &priv->peer;
- data.is_request = 1;
- data.data.req = *req;
- /*
- * when fist message is received on a transport, quota variable will
- * initiaize and quota_set will set to one. In gluster code client
- * process with respect to transport is the one who sends the first
- * message. Before settng quota_set variable if a submit request is
- * came on server, then the message should not send.
- */
-
- if (priv->entity == GF_RDMA_SERVER && peer->quota_set == 0) {
- ret = 0;
- goto out;
- }
-
- entry = gf_rdma_ioq_new(this, &data);
- if (entry == NULL) {
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_NEW_IOQ_ENTRY_FAILED,
- "getting a new ioq entry failed (peer:%s)",
- this->peerinfo.identifier);
- goto out;
- }
-
- ret = gf_rdma_writev(this, entry);
-
- if (ret > 0) {
- ret = 0;
- } else if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_WRITE_PEER_FAILED,
- "sending request to peer (%s) failed",
- this->peerinfo.identifier);
- rpc_transport_disconnect(this, _gf_false);
- }
-
-out:
- return ret;
-}
-
-int32_t
-gf_rdma_submit_reply(rpc_transport_t *this, rpc_transport_reply_t *reply)
-{
- int32_t ret = 0;
- gf_rdma_ioq_t *entry = NULL;
- rpc_transport_data_t data;
-
- if (reply == NULL) {
- goto out;
- }
-
- data.data.reply = *reply;
-
- entry = gf_rdma_ioq_new(this, &data);
- if (entry == NULL) {
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_NEW_IOQ_ENTRY_FAILED,
- "getting a new ioq entry failed (peer:%s)",
- this->peerinfo.identifier);
- goto out;
- }
-
- ret = gf_rdma_writev(this, entry);
- if (ret > 0) {
- ret = 0;
- } else if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_WRITE_PEER_FAILED,
- "sending request to peer (%s) failed",
- this->peerinfo.identifier);
- rpc_transport_disconnect(this, _gf_false);
- }
-
-out:
- return ret;
-}
-
-static int
-gf_rdma_register_peer(gf_rdma_device_t *device, int32_t qp_num,
- gf_rdma_peer_t *peer)
-{
- struct _qpent *ent = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
- int ret = -1;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
-
- pthread_mutex_lock(&qpreg->lock);
- {
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) {
- ent = ent->next;
- }
-
- if (ent->qp_num == qp_num) {
- ret = 0;
- goto unlock;
- }
-
- ent = (struct _qpent *)GF_CALLOC(1, sizeof(*ent), gf_common_mt_qpent);
- if (ent == NULL) {
- goto unlock;
- }
-
- /* TODO: ref reg->peer */
- ent->peer = peer;
- ent->next = &qpreg->ents[hash];
- ent->prev = ent->next->prev;
- ent->next->prev = ent;
- ent->prev->next = ent;
- ent->qp_num = qp_num;
- qpreg->count++;
- ret = 0;
- }
-unlock:
- pthread_mutex_unlock(&qpreg->lock);
-
- return ret;
-}
-
-static void
-gf_rdma_unregister_peer(gf_rdma_device_t *device, int32_t qp_num)
-{
- struct _qpent *ent = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
-
- pthread_mutex_lock(&qpreg->lock);
- {
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
- if ((ent->qp_num != qp_num) || (ent == &qpreg->ents[hash])) {
- pthread_mutex_unlock(&qpreg->lock);
- return;
- }
- ent->prev->next = ent->next;
- ent->next->prev = ent->prev;
- /* TODO: unref reg->peer */
- GF_FREE(ent);
- qpreg->count--;
- }
- pthread_mutex_unlock(&qpreg->lock);
-}
-
-static gf_rdma_peer_t *
-__gf_rdma_lookup_peer(gf_rdma_device_t *device, int32_t qp_num)
-{
- struct _qpent *ent = NULL;
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
-
- if (ent != &qpreg->ents[hash]) {
- peer = ent->peer;
- }
-
- return peer;
-}
-
-static void
-__gf_rdma_destroy_qp(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
- if (priv->peer.qp) {
- gf_rdma_unregister_peer(priv->device, priv->peer.qp->qp_num);
- rdma_destroy_qp(priv->peer.cm_id);
- }
- priv->peer.qp = NULL;
-
- return;
-}
-
-static int32_t
-gf_rdma_create_qp(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- int32_t ret = 0;
- gf_rdma_peer_t *peer = NULL;
- char *device_name = NULL;
-
- priv = this->private;
-
- peer = &priv->peer;
-
- device_name = (char *)ibv_get_device_name(peer->cm_id->verbs->device);
- if (device_name == NULL) {
- ret = -1;
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_GET_DEVICE_NAME_FAILED,
- "cannot get "
- "device_name");
- goto out;
- }
-
- device = gf_rdma_get_device(this, peer->cm_id->verbs, device_name);
- if (device == NULL) {
- ret = -1;
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_GET_DEVICE_FAILED,
- "cannot get device for "
- "device %s",
- device_name);
- goto out;
- }
-
- if (priv->device == NULL) {
- priv->device = device;
- }
-
- struct ibv_qp_init_attr init_attr = {
- .send_cq = device->send_cq,
- .recv_cq = device->recv_cq,
- .srq = device->srq,
- .cap = {.max_send_wr = peer->send_count,
- .max_recv_wr = peer->recv_count,
- .max_send_sge = 2,
- .max_recv_sge = 1},
- .qp_type = IBV_QPT_RC};
-
- ret = rdma_create_qp(peer->cm_id, device->pd, &init_attr);
- if (ret != 0) {
- gf_msg(peer->trans->name, GF_LOG_CRITICAL, errno,
- RDMA_MSG_CREAT_QP_FAILED, "%s: could not create QP", this->name);
- ret = -1;
- goto out;
- }
-
- peer->qp = peer->cm_id->qp;
-
- ret = gf_rdma_register_peer(device, peer->qp->qp_num, peer);
-
-out:
- if (ret == -1)
- __gf_rdma_destroy_qp(this);
-
- return ret;
-}
-
-static int32_t
-__gf_rdma_teardown(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_peer_t *peer = NULL;
-
- priv = this->private;
- peer = &priv->peer;
-
- if (peer->cm_id && peer->cm_id->qp != NULL) {
- __gf_rdma_destroy_qp(this);
- }
-
- if (!list_empty(&priv->peer.ioq)) {
- __gf_rdma_ioq_flush(peer);
- }
-
- if (peer->cm_id != NULL) {
- rdma_destroy_id(peer->cm_id);
- peer->cm_id = NULL;
- }
-
- /* TODO: decrement cq size */
- return 0;
-}
-
-static int32_t
-gf_rdma_teardown(rpc_transport_t *this)
-{
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
-
- if (this == NULL) {
- goto out;
- }
-
- priv = this->private;
-
- pthread_mutex_lock(&priv->write_mutex);
- {
- ret = __gf_rdma_teardown(this);
- }
- pthread_mutex_unlock(&priv->write_mutex);
-
-out:
- return ret;
-}
-
-/*
- * allocates new memory to hold write-chunklist. New memory is needed since
- * write-chunklist will be used while sending reply and the post holding initial
- * write-chunklist sent from client will be put back to srq before a pollin
- * event is sent to upper layers.
- */
-int32_t
-gf_rdma_get_write_chunklist(char **ptr, gf_rdma_write_array_t **write_ary)
-{
- gf_rdma_write_array_t *from = NULL, *to = NULL;
- int32_t ret = -1, size = 0, i = 0;
-
- from = (gf_rdma_write_array_t *)*ptr;
- if (from->wc_discrim == 0) {
- ret = 0;
- goto out;
- }
-
- from->wc_nchunks = ntoh32(from->wc_nchunks);
-
- size = sizeof(*from) + (sizeof(gf_rdma_write_chunk_t) * from->wc_nchunks);
-
- to = GF_CALLOC(1, size, gf_common_mt_char);
- if (to == NULL) {
- ret = -1;
- goto out;
- }
-
- to->wc_discrim = ntoh32(from->wc_discrim);
- to->wc_nchunks = from->wc_nchunks;
-
- for (i = 0; i < to->wc_nchunks; i++) {
- to->wc_array[i].wc_target.rs_handle = ntoh32(
- from->wc_array[i].wc_target.rs_handle);
- to->wc_array[i].wc_target.rs_length = ntoh32(
- from->wc_array[i].wc_target.rs_length);
- to->wc_array[i].wc_target.rs_offset = ntoh64(
- from->wc_array[i].wc_target.rs_offset);
- }
-
- *write_ary = to;
- ret = 0;
- *ptr = (char *)&from->wc_array[i].wc_target.rs_handle;
-out:
- return ret;
-}
-
-/*
- * does not allocate new memory to hold read-chunklist. New memory is not
- * needed, since post is not put back to srq till we've completed all the
- * rdma-reads and hence readchunk-list can point to memory held by post.
- */
-int32_t
-gf_rdma_get_read_chunklist(char **ptr, gf_rdma_read_chunk_t **readch)
-{
- int32_t ret = -1;
- gf_rdma_read_chunk_t *chunk = NULL;
- int i = 0;
-
- chunk = (gf_rdma_read_chunk_t *)*ptr;
- if (chunk[0].rc_discrim == 0) {
- ret = 0;
- goto out;
- }
-
- for (i = 0; chunk[i].rc_discrim != 0; i++) {
- chunk[i].rc_discrim = ntoh32(chunk[i].rc_discrim);
- chunk[i].rc_position = ntoh32(chunk[i].rc_position);
- chunk[i].rc_target.rs_handle = ntoh32(chunk[i].rc_target.rs_handle);
- chunk[i].rc_target.rs_length = ntoh32(chunk[i].rc_target.rs_length);
- chunk[i].rc_target.rs_offset = ntoh64(chunk[i].rc_target.rs_offset);
- }
-
- *readch = &chunk[0];
- ret = 0;
- *ptr = (char *)&chunk[i].rc_discrim;
-out:
- return ret;
-}
-
-static int32_t
-gf_rdma_decode_error_msg(gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- size_t bytes_in_post)
-{
- gf_rdma_header_t *header = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int32_t ret = -1;
- struct rpc_msg rpc_msg = {
- 0,
- };
-
- header = (gf_rdma_header_t *)post->buf;
- header->rm_body.rm_error.rm_type = ntoh32(header->rm_body.rm_error.rm_type);
- if (header->rm_body.rm_error.rm_type == ERR_VERS) {
- header->rm_body.rm_error.rm_version.gf_rdma_vers_low = ntoh32(
- header->rm_body.rm_error.rm_version.gf_rdma_vers_low);
- header->rm_body.rm_error.rm_version.gf_rdma_vers_high = ntoh32(
- header->rm_body.rm_error.rm_version.gf_rdma_vers_high);
- }
-
- rpc_msg.rm_xid = header->rm_xid;
- rpc_msg.rm_direction = REPLY;
- rpc_msg.rm_reply.rp_stat = MSG_DENIED;
-
- iobuf = iobuf_get2(peer->trans->ctx->iobuf_pool, bytes_in_post);
- if (iobuf == NULL) {
- ret = -1;
- goto out;
- }
-
- post->ctx.iobref = iobref = iobref_new();
- if (iobref == NULL) {
- ret = -1;
- goto out;
- }
-
- ret = rpc_reply_to_xdr(&rpc_msg, iobuf_ptr(iobuf), iobuf_pagesize(iobuf),
- &post->ctx.vector[0]);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_RPC_REPLY_CREATE_FAILED,
- "Failed to create "
- "RPC reply");
- goto out;
- }
-
- iobref_add(iobref, iobuf);
- iobuf_unref(iobuf);
-
- post->ctx.count = 1;
-
- iobuf = NULL;
- iobref = NULL;
-
-out:
- if (ret == -1) {
- if (iobuf != NULL) {
- iobuf_unref(iobuf);
- }
-
- if (iobref != NULL) {
- iobref_unref(iobref);
- }
- }
-
- return 0;
-}
-
-int32_t
-gf_rdma_decode_msg(gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t **readch, size_t bytes_in_post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
- gf_rdma_reply_info_t *reply_info = NULL;
- char *ptr = NULL;
- gf_rdma_write_array_t *write_ary = NULL;
- size_t header_len = 0;
-
- header = (gf_rdma_header_t *)post->buf;
-
- ptr = (char *)&header->rm_body.rm_chunks[0];
-
- ret = gf_rdma_get_read_chunklist(&ptr, readch);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_GET_READ_CHUNK_FAILED,
- "cannot get read "
- "chunklist from msg");
- goto out;
- }
-
- /* skip terminator of read-chunklist */
- ptr = ptr + sizeof(uint32_t);
-
- ret = gf_rdma_get_write_chunklist(&ptr, &write_ary);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_GET_WRITE_CHUNK_FAILED,
- "cannot get write "
- "chunklist from msg");
- goto out;
- }
-
- /* skip terminator of write-chunklist */
- ptr = ptr + sizeof(uint32_t);
-
- if (write_ary != NULL) {
- reply_info = gf_rdma_reply_info_alloc(peer);
- if (reply_info == NULL) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_REPLY_INFO_ALLOC_FAILED, "reply_info_alloc failed");
- ret = -1;
- goto out;
- }
-
- reply_info->type = gf_rdma_writech;
- reply_info->wc_array = write_ary;
- reply_info->rm_xid = header->rm_xid;
- } else {
- ret = gf_rdma_get_write_chunklist(&ptr, &write_ary);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_CHUNKLIST_ERROR,
- "cannot get reply "
- "chunklist from msg");
- goto out;
- }
-
- if (write_ary != NULL) {
- reply_info = gf_rdma_reply_info_alloc(peer);
- if (reply_info == NULL) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_REPLY_INFO_ALLOC_FAILED,
- "reply_info_alloc_failed");
- ret = -1;
- goto out;
- }
-
- reply_info->type = gf_rdma_replych;
- reply_info->wc_array = write_ary;
- reply_info->rm_xid = header->rm_xid;
- }
- }
-
- /* skip terminator of reply chunk */
- ptr = ptr + sizeof(uint32_t);
- if (header->rm_type != GF_RDMA_NOMSG) {
- header_len = (long)ptr - (long)post->buf;
- post->ctx.vector[0].iov_len = (bytes_in_post - header_len);
-
- post->ctx.hdr_iobuf = iobuf_get2(peer->trans->ctx->iobuf_pool,
- (bytes_in_post - header_len));
- if (post->ctx.hdr_iobuf == NULL) {
- ret = -1;
- goto out;
- }
-
- post->ctx.vector[0].iov_base = iobuf_ptr(post->ctx.hdr_iobuf);
- memcpy(post->ctx.vector[0].iov_base, ptr, post->ctx.vector[0].iov_len);
- post->ctx.count = 1;
- }
-
- post->ctx.reply_info = reply_info;
-out:
- if (ret == -1) {
- if (*readch != NULL) {
- GF_FREE(*readch);
- *readch = NULL;
- }
- if (reply_info)
- GF_FREE(reply_info);
- GF_FREE(write_ary);
- }
-
- return ret;
-}
-
-/* Assumes only one of either write-chunklist or a reply chunk is present */
-int32_t
-gf_rdma_decode_header(gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t **readch, size_t bytes_in_post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
-
- header = (gf_rdma_header_t *)post->buf;
-
- header->rm_xid = ntoh32(header->rm_xid);
- header->rm_vers = ntoh32(header->rm_vers);
- header->rm_credit = ntoh32(header->rm_credit);
- header->rm_type = ntoh32(header->rm_type);
-
- switch (header->rm_type) {
- case GF_RDMA_MSG:
- case GF_RDMA_NOMSG:
- ret = gf_rdma_decode_msg(peer, post, readch, bytes_in_post);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_ENCODE_ERROR,
- "cannot decode msg of "
- "type (%d)",
- header->rm_type);
- }
-
- break;
-
- case GF_RDMA_MSGP:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_INVALID_ENTRY,
- "rdma msg of msg-type "
- "GF_RDMA_MSGP should not have been received");
- ret = -1;
- break;
-
- case GF_RDMA_DONE:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_INVALID_ENTRY,
- "rdma msg of msg-type "
- "GF_RDMA_DONE should not have been received");
- ret = -1;
- break;
-
- case GF_RDMA_ERROR:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_RDMA_ERROR_RECEIVED,
- "received a msg of type"
- " RDMA_ERROR");
- ret = gf_rdma_decode_error_msg(peer, post, bytes_in_post);
- break;
-
- default:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_INVALID_ENTRY,
- "unknown rdma msg-type (%d)", header->rm_type);
- }
-
- return ret;
-}
-
-int32_t
-gf_rdma_do_reads(gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1, i = 0, count = 0;
- size_t size = 0;
- char *ptr = NULL;
- struct iobuf *iobuf = NULL;
- gf_rdma_private_t *priv = NULL;
- struct ibv_sge *list = NULL;
- struct ibv_send_wr *wr = NULL, *bad_wr = NULL;
- int total_ref = 0;
- priv = peer->trans->private;
-
- for (i = 0; readch[i].rc_discrim != 0; i++) {
- size += readch[i].rc_target.rs_length;
- }
-
- if (i == 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_INVALID_CHUNK_TYPE,
- "message type specified "
- "as rdma-read but there are no rdma read-chunks "
- "present");
- goto out;
- }
-
- post->ctx.gf_rdma_reads = i;
- i = 0;
- iobuf = iobuf_get2(peer->trans->ctx->iobuf_pool, size);
- if (iobuf == NULL) {
- goto out;
- }
-
- if (post->ctx.iobref == NULL) {
- post->ctx.iobref = iobref_new();
- if (post->ctx.iobref == NULL) {
- iobuf_unref(iobuf);
- iobuf = NULL;
- goto out;
- }
- }
-
- ptr = iobuf_ptr(iobuf);
- iobref_add(post->ctx.iobref, iobuf);
- iobuf_unref(iobuf);
-
- iobuf = NULL;
-
- pthread_mutex_lock(&priv->write_mutex);
- {
- if (!priv->connected) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_PEER_DISCONNECTED,
- "transport not "
- "connected to peer (%s), not doing rdma reads",
- peer->trans->peerinfo.identifier);
- goto unlock;
- }
-
- list = GF_CALLOC(post->ctx.gf_rdma_reads, sizeof(struct ibv_sge),
- gf_common_mt_sge);
-
- if (list == NULL) {
- errno = ENOMEM;
- ret = -1;
- goto unlock;
- }
- wr = GF_CALLOC(post->ctx.gf_rdma_reads, sizeof(struct ibv_send_wr),
- gf_common_mt_wr);
- if (wr == NULL) {
- errno = ENOMEM;
- ret = -1;
- goto unlock;
- }
- for (i = 0; readch[i].rc_discrim != 0; i++) {
- count = post->ctx.count++;
- post->ctx.vector[count].iov_base = ptr;
- post->ctx.vector[count].iov_len = readch[i].rc_target.rs_length;
-
- ret = __gf_rdma_register_local_mr_for_rdma(
- peer, &post->ctx.vector[count], 1, &post->ctx);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_MR_ALOC_FAILED,
- "registering local memory"
- " for rdma read failed");
- goto unlock;
- }
-
- list[i].addr = (unsigned long)post->ctx.vector[count].iov_base;
- list[i].length = post->ctx.vector[count].iov_len;
- list[i].lkey = post->ctx.mr[post->ctx.mr_count - 1]->lkey;
-
- wr[i].wr_id = (unsigned long)gf_rdma_post_ref(post);
- wr[i].sg_list = &list[i];
- wr[i].next = &wr[i + 1];
- wr[i].num_sge = 1;
- wr[i].opcode = IBV_WR_RDMA_READ;
- wr[i].send_flags = IBV_SEND_SIGNALED;
- wr[i].wr.rdma.remote_addr = readch[i].rc_target.rs_offset;
- wr[i].wr.rdma.rkey = readch[i].rc_target.rs_handle;
-
- ptr += readch[i].rc_target.rs_length;
- total_ref++;
- }
- wr[i - 1].next = NULL;
- ret = ibv_post_send(peer->qp, wr, &bad_wr);
- if (ret) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_READ_CLIENT_ERROR,
- "rdma read from "
- "client (%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror(ret) : "");
-
- if (!bad_wr) {
- ret = -1;
- goto unlock;
- }
-
- for (i = 0; i < post->ctx.gf_rdma_reads; i++) {
- if (&wr[i] != bad_wr)
- total_ref--;
- else
- break;
- }
-
- ret = -1;
- }
- }
-unlock:
- pthread_mutex_unlock(&priv->write_mutex);
-out:
- if (list)
- GF_FREE(list);
- if (wr)
- GF_FREE(wr);
-
- if (ret == -1) {
- while (total_ref-- > 0)
- gf_rdma_post_unref(post);
- }
-
- return ret;
-}
-
-int32_t
-gf_rdma_pollin_notify(gf_rdma_peer_t *peer, gf_rdma_post_t *post)
-{
- int32_t ret = -1;
- enum msg_type msg_type = 0;
- struct rpc_req *rpc_req = NULL;
- gf_rdma_request_context_t *request_context = NULL;
- rpc_request_info_t request_info = {
- 0,
- };
- gf_rdma_private_t *priv = NULL;
- uint32_t *ptr = NULL;
- rpc_transport_pollin_t *pollin = NULL;
-
- if ((peer == NULL) || (post == NULL)) {
- goto out;
- }
-
- if (post->ctx.iobref == NULL) {
- post->ctx.iobref = iobref_new();
- if (post->ctx.iobref == NULL) {
- goto out;
- }
-
- /* handling the case where both hdr and payload of
- * GF_FOP_READ_CBK were received in a single iobuf
- * because of server sending entire msg as inline without
- * doing rdma writes.
- */
- if (post->ctx.hdr_iobuf)
- iobref_add(post->ctx.iobref, post->ctx.hdr_iobuf);
- }
-
- pollin = rpc_transport_pollin_alloc(peer->trans, post->ctx.vector,
- post->ctx.count, post->ctx.hdr_iobuf,
- post->ctx.iobref, post->ctx.reply_info);
- if (pollin == NULL) {
- goto out;
- }
-
- ptr = (uint32_t *)pollin->vector[0].iov_base;
-
- request_info.xid = ntoh32(*ptr);
- msg_type = ntoh32(*(ptr + 1));
-
- if (msg_type == REPLY) {
- ret = rpc_transport_notify(peer->trans, RPC_TRANSPORT_MAP_XID_REQUEST,
- &request_info);
- if (ret == -1) {
- gf_msg_debug(GF_RDMA_LOG_NAME, 0,
- "cannot get request"
- "information from rpc layer");
- goto out;
- }
-
- rpc_req = request_info.rpc_req;
- if (rpc_req == NULL) {
- gf_msg_debug(GF_RDMA_LOG_NAME, 0,
- "rpc request "
- "structure not found");
- ret = -1;
- goto out;
- }
-
- request_context = rpc_req->conn_private;
- rpc_req->conn_private = NULL;
-
- priv = peer->trans->private;
- if (request_context != NULL) {
- pthread_mutex_lock(&priv->write_mutex);
- {
- __gf_rdma_request_context_destroy(request_context);
- }
- pthread_mutex_unlock(&priv->write_mutex);
- } else {
- gf_rdma_quota_put(peer);
- }
-
- pollin->is_reply = 1;
- }
-
- ret = rpc_transport_notify(peer->trans, RPC_TRANSPORT_MSG_RECEIVED, pollin);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, TRANS_MSG_TRANSPORT_ERROR,
- "transport_notify failed");
- }
-
-out:
- if (pollin != NULL) {
- pollin->private = NULL;
- rpc_transport_pollin_destroy(pollin);
- }
-
- return ret;
-}
-
-int32_t
-gf_rdma_recv_reply(gf_rdma_peer_t *peer, gf_rdma_post_t *post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
- gf_rdma_reply_info_t *reply_info = NULL;
- gf_rdma_write_array_t *wc_array = NULL;
- int i = 0;
- uint32_t *ptr = NULL;
- gf_rdma_request_context_t *ctx = NULL;
- rpc_request_info_t request_info = {
- 0,
- };
- struct rpc_req *rpc_req = NULL;
-
- header = (gf_rdma_header_t *)post->buf;
- reply_info = post->ctx.reply_info;
-
- /* no write chunklist, just notify upper layers */
- if (reply_info == NULL) {
- ret = 0;
- goto out;
- }
-
- wc_array = reply_info->wc_array;
-
- if (header->rm_type == GF_RDMA_NOMSG) {
- post->ctx.vector[0].iov_base = (void *)(long)wc_array->wc_array[0]
- .wc_target.rs_offset;
- post->ctx.vector[0].iov_len = wc_array->wc_array[0].wc_target.rs_length;
-
- post->ctx.count = 1;
- } else {
- for (i = 0; i < wc_array->wc_nchunks; i++) {
- post->ctx.vector[i + 1].iov_base =
- (void *)(long)wc_array->wc_array[i].wc_target.rs_offset;
- post->ctx.vector[i + 1].iov_len = wc_array->wc_array[i]
- .wc_target.rs_length;
- }
-
- post->ctx.count += wc_array->wc_nchunks;
- }
-
- ptr = (uint32_t *)post->ctx.vector[0].iov_base;
- request_info.xid = ntoh32(*ptr);
-
- ret = rpc_transport_notify(peer->trans, RPC_TRANSPORT_MAP_XID_REQUEST,
- &request_info);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, TRANS_MSG_TRANSPORT_ERROR,
- "cannot get request "
- "information (peer:%s) from rpc layer",
- peer->trans->peerinfo.identifier);
- goto out;
- }
-
- rpc_req = request_info.rpc_req;
- if (rpc_req == NULL) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_RPC_ST_ERROR,
- "rpc request structure not "
- "found");
- ret = -1;
- goto out;
- }
-
- ctx = rpc_req->conn_private;
- if ((post->ctx.iobref == NULL) && ctx->rsp_iobref) {
- post->ctx.iobref = iobref_ref(ctx->rsp_iobref);
- }
-
- ret = 0;
-
- gf_rdma_reply_info_destroy(reply_info);
-
-out:
- if (ret == 0) {
- ret = gf_rdma_pollin_notify(peer, post);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_POLL_IN_NOTIFY_FAILED, "pollin notify failed");
- }
- }
-
- return ret;
-}
-
-static int32_t
-gf_rdma_recv_request(gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1;
-
- if (readch != NULL) {
- ret = gf_rdma_do_reads(peer, post, readch);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_PEER_READ_FAILED, "rdma read from peer (%s) failed",
- peer->trans->peerinfo.identifier);
- }
- } else {
- ret = gf_rdma_pollin_notify(peer, post);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_POLL_IN_NOTIFY_FAILED,
- "pollin notification failed");
- }
- }
-
- return ret;
-}
-
-void
-gf_rdma_process_recv(gf_rdma_peer_t *peer, struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- gf_rdma_read_chunk_t *readch = NULL;
- int ret = -1;
- uint32_t *ptr = NULL;
- enum msg_type msg_type = 0;
- gf_rdma_header_t *header = NULL;
- gf_rdma_private_t *priv = NULL;
-
- post = (gf_rdma_post_t *)(long)wc->wr_id;
- if (post == NULL) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_POST_MISSING,
- "no post found in successful "
- "work completion element");
- goto out;
- }
-
- ret = gf_rdma_decode_header(peer, post, &readch, wc->byte_len);
- if (ret == -1) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_HEADER_DECODE_FAILED,
- "decoding of header "
- "failed");
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- priv = peer->trans->private;
-
- pthread_mutex_lock(&priv->write_mutex);
- {
- if (!priv->peer.quota_set) {
- priv->peer.quota_set = 1;
-
- /* Initially peer.quota is set to 1 as per RFC 5666. We
- * have to account for the quota used while sending
- * first msg (which may or may not be returned to pool
- * at this point) while deriving peer.quota from
- * header->rm_credit. Hence the arithmetic below,
- * instead of directly setting it to header->rm_credit.
- */
- priv->peer.quota = header->rm_credit - (1 - priv->peer.quota);
- }
- }
- pthread_mutex_unlock(&priv->write_mutex);
-
- switch (header->rm_type) {
- case GF_RDMA_MSG:
- ptr = (uint32_t *)post->ctx.vector[0].iov_base;
- msg_type = ntoh32(*(ptr + 1));
- break;
-
- case GF_RDMA_NOMSG:
- if (readch != NULL) {
- msg_type = CALL;
- } else {
- msg_type = REPLY;
- }
- break;
-
- case GF_RDMA_ERROR:
- if (header->rm_body.rm_error.rm_type == ERR_CHUNK) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_RDMA_ERROR_RECEIVED,
- "peer (%s), couldn't encode or decode the msg "
- "properly or write chunks were not provided "
- "for replies that were bigger than "
- "RDMA_INLINE_THRESHOLD (%d)",
- peer->trans->peerinfo.identifier,
- GLUSTERFS_RDMA_INLINE_THRESHOLD);
- ret = gf_rdma_pollin_notify(peer, post);
- if (ret == -1) {
- gf_msg_debug(GF_RDMA_LOG_NAME, 0,
- "pollin "
- "notification failed");
- }
- goto out;
- } else {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, 0,
- TRANS_MSG_TRANSPORT_ERROR,
- "an error has "
- "happened while transmission of msg, "
- "disconnecting the transport");
- ret = -1;
- goto out;
- }
-
- default:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_INVALID_ENTRY,
- "invalid rdma msg-type (%d)", header->rm_type);
- goto out;
- }
-
- if (msg_type == CALL) {
- ret = gf_rdma_recv_request(peer, post, readch);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_PEER_REQ_FAILED,
- "receiving a request"
- " from peer (%s) failed",
- peer->trans->peerinfo.identifier);
- }
- } else {
- ret = gf_rdma_recv_reply(peer, post);
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_PEER_REP_FAILED,
- "receiving a reply "
- "from peer (%s) failed",
- peer->trans->peerinfo.identifier);
- }
- }
-
-out:
- if (ret == -1) {
- rpc_transport_disconnect(peer->trans, _gf_false);
- }
-
- return;
-}
-
-void *
-gf_rdma_async_event_thread(void *context)
-{
- struct ibv_async_event event;
- int ret;
-
- while (1) {
- do {
- ret = ibv_get_async_event((struct ibv_context *)context, &event);
-
- if (ret && errno != EINTR) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
- RDMA_MSG_EVENT_ERROR,
- "Error getting "
- "event");
- }
- } while (ret && errno == EINTR);
-
- switch (event.event_type) {
- case IBV_EVENT_SRQ_LIMIT_REACHED:
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_EVENT_SRQ_LIMIT_REACHED,
- "received "
- "srq_limit reached");
- break;
-
- default:
- gf_msg_debug(GF_RDMA_LOG_NAME, 0,
- "event (%d) "
- "received",
- event.event_type);
- break;
- }
-
- ibv_ack_async_event(&event);
- }
-
- return 0;
-}
-
-static void *
-gf_rdma_recv_completion_proc(void *data)
-{
- struct ibv_comp_channel *chan = NULL;
- gf_rdma_device_t *device = NULL;
- ;
- gf_rdma_post_t *post = NULL;
- gf_rdma_peer_t *peer = NULL;
- struct ibv_cq *event_cq = NULL;
- struct ibv_wc wc[10] = {
- {0},
- };
- void *event_ctx = NULL;
- int32_t ret = 0;
- int32_t num_wr = 0, index = 0;
- uint8_t failed = 0;
-
- chan = data;
-
- while (1) {
- failed = 0;
- ret = ibv_get_cq_event(chan, &event_cq, &event_ctx);
- if (ret) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, errno,
- RDMA_MSG_IBV_GET_CQ_FAILED,
- "ibv_get_cq_event failed, terminating recv "
- "thread %d (%d)",
- ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq(event_cq, 0);
- if (ret) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, errno,
- RDMA_MSG_IBV_REQ_NOTIFY_CQ_FAILED,
- "ibv_req_notify_cq on %s failed, terminating "
- "recv thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- device = (gf_rdma_device_t *)event_ctx;
-
- while (!failed && (num_wr = ibv_poll_cq(event_cq, 10, wc)) > 0) {
- for (index = 0; index < num_wr && !failed; index++) {
- post = (gf_rdma_post_t *)(long)wc[index].wr_id;
-
- pthread_mutex_lock(&device->qpreg.lock);
- {
- peer = __gf_rdma_lookup_peer(device, wc[index].qp_num);
-
- /*
- * keep a refcount on transport so that it
- * does not get freed because of some error
- * indicated by wc.status till we are done
- * with usage of peer and thereby that of
- * trans.
- */
- if (peer != NULL) {
- rpc_transport_ref(peer->trans);
- }
- }
- pthread_mutex_unlock(&device->qpreg.lock);
-
- if (wc[index].status != IBV_WC_SUCCESS) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, 0,
- RDMA_MSG_RECV_ERROR,
- "recv work "
- "request on `%s' returned error (%d)",
- device->device_name, wc[index].status);
- failed = 1;
- if (peer) {
- ibv_ack_cq_events(event_cq, num_wr);
- rpc_transport_disconnect(peer->trans, _gf_false);
- rpc_transport_unref(peer->trans);
- }
-
- if (post) {
- gf_rdma_post_unref(post);
- }
-
- continue;
- }
-
- if (peer) {
- gf_rdma_process_recv(peer, &wc[index]);
- rpc_transport_unref(peer->trans);
- } else {
- gf_msg_debug(GF_RDMA_LOG_NAME, 0,
- "could not lookup peer "
- "for qp_num: %d",
- wc[index].qp_num);
- }
-
- gf_rdma_post_unref(post);
- }
- }
-
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, errno,
- RDMA_MSG_IBV_POLL_CQ_ERROR,
- "ibv_poll_cq on `%s' returned error "
- "(ret = %d, errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
- if (!failed)
- ibv_ack_cq_events(event_cq, num_wr);
- }
-
- return NULL;
-}
-
-void
-gf_rdma_handle_failed_send_completion(gf_rdma_peer_t *peer, struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- gf_rdma_device_t *device = NULL;
- gf_rdma_private_t *priv = NULL;
-
- if (peer != NULL) {
- priv = peer->trans->private;
- if (priv != NULL) {
- device = priv->device;
- }
- }
-
- post = (gf_rdma_post_t *)(long)wc->wr_id;
-
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0, RDMA_MSG_RDMA_HANDLE_FAILED,
- "send work request on `%s' returned error "
- "wc.status = %d, wc.vendor_err = %d, post->buf = %p, "
- "wc.byte_len = %d, post->reused = %d",
- (device != NULL) ? device->device_name : NULL, wc->status,
- wc->vendor_err, post->buf, wc->byte_len, post->reused);
-
- if (wc->status == IBV_WC_RETRY_EXC_ERR) {
- gf_msg("rdma", GF_LOG_ERROR, 0, TRANS_MSG_TIMEOUT_EXCEEDED,
- "connection between client and server not working. "
- "check by running 'ibv_srq_pingpong'. also make sure "
- "subnet manager is running (eg: 'opensm'), or check "
- "if rdma port is valid (or active) by running "
- "'ibv_devinfo'. contact Gluster Support Team if the "
- "problem persists.");
- }
-
- if (peer) {
- rpc_transport_disconnect(peer->trans, _gf_false);
- }
-
- return;
-}
-
-void
-gf_rdma_handle_successful_send_completion(gf_rdma_peer_t *peer,
- struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- int reads = 0, ret = 0;
- gf_rdma_header_t *header = NULL;
-
- if (wc->opcode != IBV_WC_RDMA_READ) {
- goto out;
- }
-
- post = (gf_rdma_post_t *)(long)wc->wr_id;
-
- pthread_mutex_lock(&post->lock);
- {
- reads = --post->ctx.gf_rdma_reads;
- }
- pthread_mutex_unlock(&post->lock);
-
- if (reads != 0) {
- /* if it is not the last rdma read, we've got nothing to do */
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- if (header->rm_type == GF_RDMA_NOMSG) {
- post->ctx.count = 1;
- post->ctx.vector[0].iov_len += post->ctx.vector[1].iov_len;
- }
- /*
- * if reads performed as vectored, then all the buffers are actually
- * contiguous memory, so that we can use it as single vector, instead
- * of multiple.
- */
- while (post->ctx.count > 2) {
- post->ctx.vector[1].iov_len += post->ctx.vector[post->ctx.count - 1]
- .iov_len;
- post->ctx.count--;
- }
-
- ret = gf_rdma_pollin_notify(peer, post);
- if ((ret == -1) && (peer != NULL)) {
- rpc_transport_disconnect(peer->trans, _gf_false);
- }
-
-out:
- return;
-}
-
-static void *
-gf_rdma_send_completion_proc(void *data)
-{
- struct ibv_comp_channel *chan = NULL;
- gf_rdma_post_t *post = NULL;
- gf_rdma_peer_t *peer = NULL;
- struct ibv_cq *event_cq = NULL;
- void *event_ctx = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_wc wc[10] = {
- {0},
- };
- char is_request = 0;
- int32_t ret = 0, quota_ret = 0, num_wr = 0;
- int32_t index = 0, failed = 0;
- chan = data;
- while (1) {
- failed = 0;
- ret = ibv_get_cq_event(chan, &event_cq, &event_ctx);
- if (ret) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, errno,
- RDMA_MSG_IBV_GET_CQ_FAILED,
- "ibv_get_cq_event on failed, terminating "
- "send thread: %d (%d)",
- ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq(event_cq, 0);
- if (ret) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, errno,
- RDMA_MSG_IBV_REQ_NOTIFY_CQ_FAILED,
- "ibv_req_notify_cq on %s failed, terminating "
- "send thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- while (!failed && (num_wr = ibv_poll_cq(event_cq, 10, wc)) > 0) {
- for (index = 0; index < num_wr && !failed; index++) {
- post = (gf_rdma_post_t *)(long)wc[index].wr_id;
-
- pthread_mutex_lock(&device->qpreg.lock);
- {
- peer = __gf_rdma_lookup_peer(device, wc[index].qp_num);
-
- /*
- * keep a refcount on transport so that it
- * does not get freed because of some error
- * indicated by wc.status, till we are done
- * with usage of peer and thereby that of trans.
- */
- if (peer != NULL) {
- rpc_transport_ref(peer->trans);
- }
- }
- pthread_mutex_unlock(&device->qpreg.lock);
-
- if (wc[index].status != IBV_WC_SUCCESS) {
- ibv_ack_cq_events(event_cq, num_wr);
- failed = 1;
- gf_rdma_handle_failed_send_completion(peer, &wc[index]);
- } else {
- gf_rdma_handle_successful_send_completion(peer, &wc[index]);
- }
-
- if (post) {
- is_request = post->ctx.is_request;
-
- ret = gf_rdma_post_unref(post);
- if ((ret == 0) && (wc[index].status == IBV_WC_SUCCESS) &&
- !is_request && (post->type == GF_RDMA_SEND_POST) &&
- (peer != NULL)) {
- /* An GF_RDMA_RECV_POST can end up in
- * gf_rdma_send_completion_proc for
- * rdma-reads, and we do not take
- * quota for getting an GF_RDMA_RECV_POST.
- */
-
- /*
- * if it is request, quota is returned
- * after reply has come.
- */
- quota_ret = gf_rdma_quota_put(peer);
- if (quota_ret < 0) {
- gf_msg_debug("rdma", 0,
- "failed to send "
- "message");
- }
- }
- }
-
- if (peer) {
- rpc_transport_unref(peer->trans);
- } else {
- gf_msg_debug(GF_RDMA_LOG_NAME, 0,
- "could not lookup peer for qp_num: %d",
- wc[index].qp_num);
- }
- }
- }
-
- if (ret < 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_ERROR, errno,
- RDMA_MSG_IBV_POLL_CQ_ERROR,
- "ibv_poll_cq on `%s' returned error (ret = %d,"
- " errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
- if (!failed)
- ibv_ack_cq_events(event_cq, num_wr);
- }
-
- return NULL;
-}
-
-static void
-gf_rdma_options_init(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- int32_t mtu = 0;
- data_t *temp = NULL;
-
- /* TODO: validate arguments from options below */
-
- priv = this->private;
- options = &priv->options;
- options->send_size =
- GLUSTERFS_RDMA_INLINE_THRESHOLD; /*this->ctx->page_size * 4; 512 KB*/
- options->recv_size =
- GLUSTERFS_RDMA_INLINE_THRESHOLD; /*this->ctx->page_size * 4; 512 KB*/
- options->send_count = 4096;
- options->recv_count = 4096;
- options->attr_timeout = GF_RDMA_TIMEOUT;
- options->attr_retry_cnt = GF_RDMA_RETRY_CNT;
- options->attr_rnr_retry = GF_RDMA_RNR_RETRY;
-
- temp = dict_get(this->options, "transport.listen-backlog");
- if (temp)
- options->backlog = data_to_uint32(temp);
- else
- options->backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
-
- temp = dict_get(this->options, "transport.rdma.work-request-send-count");
- if (temp)
- options->send_count = data_to_int32(temp);
-
- temp = dict_get(this->options, "transport.rdma.work-request-recv-count");
- if (temp)
- options->recv_count = data_to_int32(temp);
-
- temp = dict_get(this->options, "transport.rdma.attr-timeout");
-
- if (temp)
- options->attr_timeout = data_to_uint8(temp);
-
- temp = dict_get(this->options, "transport.rdma.attr-retry-cnt");
-
- if (temp)
- options->attr_retry_cnt = data_to_uint8(temp);
-
- temp = dict_get(this->options, "transport.rdma.attr-rnr-retry");
-
- if (temp)
- options->attr_rnr_retry = data_to_uint8(temp);
-
- options->port = 1;
- temp = dict_get(this->options, "transport.rdma.port");
- if (temp)
- options->port = data_to_uint64(temp);
-
- options->mtu = mtu = IBV_MTU_2048;
- temp = dict_get(this->options, "transport.rdma.mtu");
- if (temp)
- mtu = data_to_int32(temp);
- switch (mtu) {
- case 256:
- options->mtu = IBV_MTU_256;
- break;
-
- case 512:
- options->mtu = IBV_MTU_512;
- break;
-
- case 1024:
- options->mtu = IBV_MTU_1024;
- break;
-
- case 2048:
- options->mtu = IBV_MTU_2048;
- break;
-
- case 4096:
- options->mtu = IBV_MTU_4096;
- break;
- default:
- if (temp)
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, 0,
- RDMA_MSG_UNRECG_MTU_VALUE,
- "%s: unrecognized "
- "MTU value '%s', defaulting to '2048'",
- this->name, data_to_str(temp));
- else
- gf_msg_trace(GF_RDMA_LOG_NAME, 0,
- "%s: defaulting "
- "MTU to '2048'",
- this->name);
- options->mtu = IBV_MTU_2048;
- break;
- }
-
- temp = dict_get(this->options, "transport.rdma.device-name");
- if (temp)
- options->device_name = gf_strdup(temp->data);
-
- return;
-}
-
-gf_rdma_ctx_t *
-__gf_rdma_ctx_create(void)
-{
- gf_rdma_ctx_t *rdma_ctx = NULL;
- int ret = -1;
-
- rdma_ctx = GF_CALLOC(1, sizeof(*rdma_ctx), gf_common_mt_char);
- if (rdma_ctx == NULL) {
- goto out;
- }
- pthread_mutex_init(&rdma_ctx->lock, NULL);
- rdma_ctx->rdma_cm_event_channel = rdma_create_event_channel();
- if (rdma_ctx->rdma_cm_event_channel == NULL) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, errno,
- RDMA_MSG_CM_EVENT_FAILED,
- "rdma_cm event channel "
- "creation failed");
- goto out;
- }
-
- ret = gf_thread_create(&rdma_ctx->rdma_cm_thread, NULL,
- gf_rdma_cm_event_handler,
- rdma_ctx->rdma_cm_event_channel, "rdmaehan");
- if (ret != 0) {
- gf_msg(GF_RDMA_LOG_NAME, GF_LOG_WARNING, ret, RDMA_MSG_CM_EVENT_FAILED,
- "creation of thread to "
- "handle rdma-cm events failed");
- goto out;
- }
-
-out:
- if (ret < 0 && rdma_ctx) {
- if (rdma_ctx->rdma_cm_event_channel != NULL) {
- rdma_destroy_event_channel(rdma_ctx->rdma_cm_event_channel);
- }
-
- GF_FREE(rdma_ctx);
- rdma_ctx = NULL;
- }
-
- return rdma_ctx;
-}
-
-static int32_t
-gf_rdma_init(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
- glusterfs_ctx_t *ctx = NULL;
- gf_rdma_options_t *options = NULL;
-
- ctx = this->ctx;
-
- priv = this->private;
-
- ibv_fork_init();
- gf_rdma_options_init(this);
-
- options = &priv->options;
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
- priv->backlog = options->backlog;
-
- priv->peer.trans = this;
- INIT_LIST_HEAD(&priv->peer.ioq);
-
- pthread_mutex_init(&priv->write_mutex, NULL);
- pthread_mutex_init(&priv->recv_mutex, NULL);
- pthread_cond_init(&priv->recv_cond, NULL);
-
- LOCK(&ctx->lock);
- {
- if (ctx->ib == NULL) {
- ctx->ib = __gf_rdma_ctx_create();
- if (ctx->ib == NULL) {
- ret = -1;
- }
- }
- }
- UNLOCK(&ctx->lock);
-
- return ret;
-}
-
-static int32_t
-gf_rdma_disconnect(rpc_transport_t *this, gf_boolean_t wait)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
-
- priv = this->private;
- gf_msg_callingfn(this->name, GF_LOG_DEBUG, 0, RDMA_MSG_PEER_DISCONNECTED,
- "disconnect called (peer:%s)", this->peerinfo.identifier);
-
- pthread_mutex_lock(&priv->write_mutex);
- {
- ret = __gf_rdma_disconnect(this);
- }
- pthread_mutex_unlock(&priv->write_mutex);
-
- return ret;
-}
-
-static int32_t
-gf_rdma_connect(struct rpc_transport *this, int port)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
- union gf_sock_union sock_union = {
- {
- 0,
- },
- };
- socklen_t sockaddr_len = 0;
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_ctx_t *rdma_ctx = NULL;
- gf_boolean_t connected = _gf_false;
-
- priv = this->private;
-
- peer = &priv->peer;
-
- rpc_transport_ref(this);
-
- ret = gf_rdma_client_get_remote_sockaddr(this, &sock_union.sa,
- &sockaddr_len, port);
- if (ret != 0) {
- gf_msg_debug(this->name, 0,
- "cannot get remote address to "
- "connect");
- goto out;
- }
-
- rdma_ctx = this->ctx->ib;
-
- pthread_mutex_lock(&priv->write_mutex);
- {
- if (peer->cm_id != NULL) {
- ret = -1;
- errno = EINPROGRESS;
- connected = _gf_true;
- goto unlock;
- }
-
- priv->entity = GF_RDMA_CLIENT;
-
- ret = rdma_create_id(rdma_ctx->rdma_cm_event_channel, &peer->cm_id,
- this, RDMA_PS_TCP);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, RDMA_MSG_CM_EVENT_FAILED,
- "creation of "
- "rdma_cm_id failed");
- ret = -errno;
- goto unlock;
- }
-
- memcpy(&this->peerinfo.sockaddr, &sock_union.storage, sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
-
- if (port > 0)
- sock_union.sin.sin_port = htons(port);
-
- ((struct sockaddr *)&this->myinfo.sockaddr)->sa_family =
- ((struct sockaddr *)&this->peerinfo.sockaddr)->sa_family;
-
- ret = gf_rdma_client_bind(this,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len, peer->cm_id);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, errno,
- RDMA_MSG_CLIENT_BIND_FAILED, "client bind failed");
- goto unlock;
- }
-
- ret = rdma_resolve_addr(peer->cm_id, NULL, &sock_union.sa, 2000);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, errno,
- RDMA_MSG_RDMA_RESOLVE_ADDR_FAILED,
- "rdma_resolve_addr failed");
- goto unlock;
- }
-
- priv->connected = 0;
- }
-unlock:
- pthread_mutex_unlock(&priv->write_mutex);
-
-out:
- if (ret != 0) {
- if (!connected) {
- gf_rdma_teardown(this);
- }
-
- rpc_transport_unref(this);
- }
-
- return ret;
-}
-
-static int32_t
-gf_rdma_listen(rpc_transport_t *this)
-{
- union gf_sock_union sock_union = {
- {
- 0,
- },
- };
- socklen_t sockaddr_len = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_peer_t *peer = NULL;
- int ret = 0;
- gf_rdma_ctx_t *rdma_ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- char service[NI_MAXSERV], host[NI_MAXHOST];
- int optval = 2;
-
- priv = this->private;
- peer = &priv->peer;
-
- priv->entity = GF_RDMA_SERVER_LISTENER;
-
- rdma_ctx = this->ctx->ib;
-
- ret = gf_rdma_server_get_local_sockaddr(this, &sock_union.sa,
- &sockaddr_len);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_NW_ADDR_UNKNOWN,
- "cannot find network address of server to bind to");
- goto err;
- }
-
- ret = rdma_create_id(rdma_ctx->rdma_cm_event_channel, &peer->cm_id, this,
- RDMA_PS_TCP);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, errno, RDMA_MSG_CM_EVENT_FAILED,
- "creation of rdma_cm_id "
- "failed");
- goto err;
- }
-
- memcpy(&this->myinfo.sockaddr, &sock_union.storage, sockaddr_len);
- this->myinfo.sockaddr_len = sockaddr_len;
-
- ret = getnameinfo((struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len, host, sizeof(host), service,
- sizeof(service), NI_NUMERICHOST);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, ret, TRANS_MSG_GET_NAME_INFO_FAILED,
- "getnameinfo failed");
- goto err;
- }
-
- sprintf(this->myinfo.identifier, "%s:%s", host, service);
-
- ret = rdma_set_option(peer->cm_id, RDMA_OPTION_ID, RDMA_OPTION_ID_REUSEADDR,
- (void *)&optval, sizeof(optval));
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, errno, RDMA_MSG_OPTION_SET_FAILED,
- "rdma option set failed");
- goto err;
- }
-
- ret = rdma_bind_addr(peer->cm_id, &sock_union.sa);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, errno,
- RDMA_MSG_RDMA_BIND_ADDR_FAILED, "rdma_bind_addr failed");
- goto err;
- }
- ret = rdma_listen(peer->cm_id, priv->backlog);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, errno, RDMA_MSG_LISTEN_FAILED,
- "rdma_listen failed");
- goto err;
- }
-
- cmd_args = &(this->ctx->cmd_args);
- if (!cmd_args->brick_port2) {
- cmd_args->brick_port2 = rdma_get_src_port(peer->cm_id);
- gf_log(this->name, GF_LOG_INFO,
- "process started listening on port (%d)", cmd_args->brick_port2);
- }
-
- rpc_transport_ref(this);
-
- ret = 0;
-err:
- if (ret < 0) {
- if (peer->cm_id != NULL) {
- rdma_destroy_id(peer->cm_id);
- peer->cm_id = NULL;
- }
- }
-
- return ret;
-}
-
-struct rpc_transport_ops tops = {
- .submit_request = gf_rdma_submit_request,
- .submit_reply = gf_rdma_submit_reply,
- .connect = gf_rdma_connect,
- .disconnect = gf_rdma_disconnect,
- .listen = gf_rdma_listen,
-};
-
-int32_t
-init(rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_ctx_t *rdma_ctx = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_common_mt_rdma_private_t);
- if (!priv)
- return -1;
-
- this->private = priv;
-
- if (gf_rdma_init(this)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, RDMA_MSG_INIT_IB_DEVICE_FAILED,
- "Failed to initialize IB Device");
- this->private = NULL;
- GF_FREE(priv);
- return -1;
- }
- rdma_ctx = this->ctx->ib;
- if (!rdma_ctx)
- return -1;
-
- pthread_mutex_lock(&rdma_ctx->lock);
- {
- if (this->dl_handle && (++(rdma_ctx->dlcount)) == 1) {
- iobuf_pool = this->ctx->iobuf_pool;
- iobuf_pool->rdma_registration = gf_rdma_register_arena;
- iobuf_pool->rdma_deregistration = gf_rdma_deregister_arena;
- gf_rdma_register_iobuf_pool_with_device(rdma_ctx->device,
- iobuf_pool);
- }
- }
- pthread_mutex_unlock(&rdma_ctx->lock);
-
- return 0;
-}
-
-int
-reconfigure(rpc_transport_t *this, dict_t *options)
-{
- gf_rdma_private_t *priv = NULL;
- uint32_t backlog = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("rdma", this, out);
- GF_VALIDATE_OR_GOTO("rdma", this->private, out);
-
- priv = this->private;
-
- if (dict_get_uint32(options, "transport.listen-backlog", &backlog) == 0) {
- priv->backlog = backlog;
- gf_log(this->name, GF_LOG_DEBUG,
- "Reconfigued "
- "transport.listen-backlog=%d",
- priv->backlog);
- }
- ret = 0;
-out:
- return ret;
-}
-void
-fini(struct rpc_transport *this)
-{
- /* TODO: verify this function does graceful finish */
- gf_rdma_private_t *priv = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
- gf_rdma_ctx_t *rdma_ctx = NULL;
-
- priv = this->private;
-
- this->private = NULL;
-
- if (priv) {
- pthread_mutex_destroy(&priv->recv_mutex);
- pthread_mutex_destroy(&priv->write_mutex);
-
- gf_msg_trace(this->name, 0, "called fini on transport: %p", this);
- GF_FREE(priv);
- }
-
- rdma_ctx = this->ctx->ib;
- if (!rdma_ctx)
- return;
-
- pthread_mutex_lock(&rdma_ctx->lock);
- {
- if (this->dl_handle && (--(rdma_ctx->dlcount)) == 0) {
- iobuf_pool = this->ctx->iobuf_pool;
- gf_rdma_deregister_iobuf_pool(rdma_ctx->device);
- iobuf_pool->rdma_registration = NULL;
- iobuf_pool->rdma_deregistration = NULL;
- }
- }
- pthread_mutex_unlock(&rdma_ctx->lock);
-
- return;
-}
-
-/* TODO: expand each option */
-struct volume_options options[] = {
- {.key = {"transport.rdma.port", "rdma-port"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 4,
- .description = "check the option by 'ibv_devinfo'"},
- {
- .key = {"transport.rdma.mtu", "rdma-mtu"},
- .type = GF_OPTION_TYPE_INT,
- },
- {.key = {"transport.rdma.device-name", "rdma-device-name"},
- .type = GF_OPTION_TYPE_ANY,
- .description = "check by 'ibv_devinfo'"},
- {
- .key = {"transport.rdma.work-request-send-count",
- "rdma-work-request-send-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- {
- .key = {"transport.rdma.work-request-recv-count",
- "rdma-work-request-recv-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- {.key = {"remote-port", "transport.remote-port",
- "transport.rdma.remote-port"},
- .type = GF_OPTION_TYPE_INT},
- {.key = {"transport.rdma.attr-timeout", "rdma-attr-timeout"},
- .type = GF_OPTION_TYPE_INT},
- {.key = {"transport.rdma.attr-retry-cnt", "rdma-attr-retry-cnt"},
- .type = GF_OPTION_TYPE_INT},
- {.key = {"transport.rdma.attr-rnr-retry", "rdma-attr-rnr-retry"},
- .type = GF_OPTION_TYPE_INT},
- {.key = {"transport.rdma.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT},
- {.key = {"transport.rdma.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY},
- {.key = {"transport.rdma.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY},
- {.key = {"transport.rdma.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY},
- {.key = {"transport.address-family", "address-family"},
- .value = {"inet", "inet6", "inet/inet6", "inet6/inet", "unix", "inet-sdp"},
- .type = GF_OPTION_TYPE_STR},
- {.key = {"transport.socket.lowlat"}, .type = GF_OPTION_TYPE_BOOL},
- {.key = {NULL}}};
diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h
deleted file mode 100644
index 84c1397054b..00000000000
--- a/rpc/rpc-transport/rdma/src/rdma.h
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _XPORT_RDMA_H
-#define _XPORT_RDMA_H
-
-#ifndef MAX_IOVEC
-#define MAX_IOVEC 16
-#endif /* MAX_IOVEC */
-
-#include "rpc-clnt.h"
-#include "rpc-transport.h"
-#include "xlator.h"
-#include "gf-event.h"
-#include <stdio.h>
-#include <list.h>
-#include <arpa/inet.h>
-#include <infiniband/verbs.h>
-#include <rdma/rdma_cma.h>
-
-/* FIXME: give appropriate values to these macros */
-#define GF_DEFAULT_RDMA_LISTEN_PORT (GF_DEFAULT_BASE_PORT + 1)
-
-/* If you are changing GF_RDMA_MAX_SEGMENTS, please make sure to update
- * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h .
- */
-#define GF_RDMA_MAX_SEGMENTS 8
-
-#define GF_RDMA_VERSION 1
-#define GF_RDMA_POOL_SIZE 512
-
-/* Additional attributes */
-#define GF_RDMA_TIMEOUT 14
-#define GF_RDMA_RETRY_CNT 7
-#define GF_RDMA_RNR_RETRY 7
-
-typedef enum gf_rdma_errcode { ERR_VERS = 1, ERR_CHUNK = 2 } gf_rdma_errcode_t;
-
-struct gf_rdma_err_vers {
- uint32_t gf_rdma_vers_low; /* Version range supported by peer */
- uint32_t gf_rdma_vers_high;
-} __attribute__((packed));
-typedef struct gf_rdma_err_vers gf_rdma_err_vers_t;
-
-typedef enum gf_rdma_proc {
- GF_RDMA_MSG = 0, /* An RPC call or reply msg */
- GF_RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */
- GF_RDMA_MSGP = 2, /* An RPC call or reply msg with padding */
- GF_RDMA_DONE = 3, /* Client signals reply completion */
- GF_RDMA_ERROR = 4 /* An RPC RDMA encoding error */
-} gf_rdma_proc_t;
-
-typedef enum gf_rdma_chunktype {
- gf_rdma_noch = 0, /* no chunk */
- gf_rdma_readch, /* some argument through rdma read */
- gf_rdma_areadch, /* entire request through rdma read */
- gf_rdma_writech, /* some result through rdma write */
- gf_rdma_replych /* entire reply through rdma write */
-} gf_rdma_chunktype_t;
-
-/* If you are modifying __gf_rdma_header, please make sure to change
- * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h to reflect your
- * changes
- */
-struct __gf_rdma_header {
- uint32_t rm_xid; /* Mirrors the RPC header xid */
- uint32_t rm_vers; /* Version of this protocol */
- uint32_t rm_credit; /* Buffers requested/granted */
- uint32_t rm_type; /* Type of message (enum gf_rdma_proc) */
- union {
- struct { /* no chunks */
- uint32_t rm_empty[3]; /* 3 empty chunk lists */
- } __attribute__((packed)) rm_nochunks;
-
- struct { /* no chunks and padded */
- uint32_t rm_align; /* Padding alignment */
- uint32_t rm_thresh; /* Padding threshold */
- uint32_t rm_pempty[3]; /* 3 empty chunk lists */
- } __attribute__((packed)) rm_padded;
-
- struct {
- uint32_t rm_type;
- gf_rdma_err_vers_t rm_version;
- } __attribute__((packed)) rm_error;
-
- uint32_t rm_chunks[0]; /* read, write and reply chunks */
- } __attribute__((packed)) rm_body;
-} __attribute__((packed));
-typedef struct __gf_rdma_header gf_rdma_header_t;
-
-/* If you are modifying __gf_rdma_segment or __gf_rdma_read_chunk, please make
- * sure to change GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h to
- * reflect your changes.
- */
-struct __gf_rdma_segment {
- uint32_t rs_handle; /* Registered memory handle */
- uint32_t rs_length; /* Length of the chunk in bytes */
- uint64_t rs_offset; /* Chunk virtual address or offset */
-} __attribute__((packed));
-typedef struct __gf_rdma_segment gf_rdma_segment_t;
-
-/* read chunk(s), encoded as a linked list. */
-struct __gf_rdma_read_chunk {
- uint32_t rc_discrim; /* 1 indicates presence */
- uint32_t rc_position; /* Position in XDR stream */
- gf_rdma_segment_t rc_target;
-} __attribute__((packed));
-typedef struct __gf_rdma_read_chunk gf_rdma_read_chunk_t;
-
-/* write chunk, and reply chunk. */
-struct __gf_rdma_write_chunk {
- gf_rdma_segment_t wc_target;
-} __attribute__((packed));
-typedef struct __gf_rdma_write_chunk gf_rdma_write_chunk_t;
-
-/* write chunk(s), encoded as a counted array. */
-struct __gf_rdma_write_array {
- uint32_t wc_discrim; /* 1 indicates presence */
- uint32_t wc_nchunks; /* Array count */
- struct __gf_rdma_write_chunk wc_array[0];
-} __attribute__((packed));
-typedef struct __gf_rdma_write_array gf_rdma_write_array_t;
-
-/* options per transport end point */
-struct __gf_rdma_options {
- int32_t port;
- char *device_name;
- enum ibv_mtu mtu;
- int32_t send_count;
- int32_t recv_count;
- uint64_t recv_size;
- uint64_t send_size;
- uint8_t attr_timeout;
- uint8_t attr_retry_cnt;
- uint8_t attr_rnr_retry;
- uint32_t backlog;
-};
-typedef struct __gf_rdma_options gf_rdma_options_t;
-
-struct __gf_rdma_reply_info {
- uint32_t rm_xid; /* xid in network endian */
- gf_rdma_chunktype_t type; /*
- * can be either gf_rdma_replych
- * or gf_rdma_writech.
- */
- gf_rdma_write_array_t *wc_array;
- struct mem_pool *pool;
-};
-typedef struct __gf_rdma_reply_info gf_rdma_reply_info_t;
-
-struct __gf_rdma_ioq {
- union {
- struct list_head list;
- struct {
- struct __gf_rdma_ioq *next;
- struct __gf_rdma_ioq *prev;
- };
- };
-
- char is_request;
- struct iovec rpchdr[MAX_IOVEC];
- int rpchdr_count;
- struct iovec proghdr[MAX_IOVEC];
- int proghdr_count;
- struct iovec prog_payload[MAX_IOVEC];
- int prog_payload_count;
-
- struct iobref *iobref;
-
- union {
- struct __gf_rdma_ioq_request {
- /* used to build reply_chunk for GF_RDMA_NOMSG type msgs */
- struct iovec rsphdr_vec[MAX_IOVEC];
- int rsphdr_count;
-
- /*
- * used to build write_array during operations like
- * read.
- */
- struct iovec rsp_payload[MAX_IOVEC];
- int rsp_payload_count;
-
- struct rpc_req *rpc_req; /* FIXME: hack! hack! should be
- * cleaned up later
- */
- struct iobref *rsp_iobref;
- } request;
-
- gf_rdma_reply_info_t *reply_info;
- } msg;
-
- struct mem_pool *pool;
-};
-typedef struct __gf_rdma_ioq gf_rdma_ioq_t;
-
-typedef enum __gf_rdma_send_post_type {
- GF_RDMA_SEND_POST_NO_CHUNKLIST, /* post which is sent using rdma-send
- * and the msg carries no
- * chunklists.
- */
- GF_RDMA_SEND_POST_READ_CHUNKLIST, /* post which is sent using rdma-send
- * and the msg carries only read
- * chunklist.
- */
- GF_RDMA_SEND_POST_WRITE_CHUNKLIST, /* post which is sent using
- * rdma-send and the msg carries
- * only write chunklist.
- */
- GF_RDMA_SEND_POST_READ_WRITE_CHUNKLIST, /* post which is sent using
- * rdma-send and the msg
- * carries both read and
- * write chunklists.
- */
- GF_RDMA_SEND_POST_GF_RDMA_READ, /* RDMA read */
- GF_RDMA_SEND_POST_GF_RDMA_WRITE, /* RDMA write */
-} gf_rdma_send_post_type_t;
-
-/* represents one communication peer, two per transport_t */
-struct __gf_rdma_peer {
- rpc_transport_t *trans;
- struct rdma_cm_id *cm_id;
- struct ibv_qp *qp;
- pthread_t rdma_event_thread;
- char quota_set;
-
- int32_t recv_count;
- int32_t send_count;
- int32_t recv_size;
- int32_t send_size;
-
- int32_t quota;
- union {
- struct list_head ioq;
- struct {
- gf_rdma_ioq_t *ioq_next;
- gf_rdma_ioq_t *ioq_prev;
- };
- };
-
- /* QP attributes, needed to connect with remote QP */
- int32_t local_lid;
- int32_t local_psn;
- int32_t local_qpn;
- int32_t remote_lid;
- int32_t remote_psn;
- int32_t remote_qpn;
-};
-typedef struct __gf_rdma_peer gf_rdma_peer_t;
-
-struct __gf_rdma_post_context {
- struct ibv_mr *mr[GF_RDMA_MAX_SEGMENTS];
- int mr_count;
- struct iovec vector[MAX_IOVEC];
- int count;
- struct iobref *iobref;
- struct iobuf *hdr_iobuf;
- char is_request;
- int gf_rdma_reads;
- gf_rdma_reply_info_t *reply_info;
-};
-typedef struct __gf_rdma_post_context gf_rdma_post_context_t;
-
-typedef enum { GF_RDMA_SEND_POST, GF_RDMA_RECV_POST } gf_rdma_post_type_t;
-
-struct __gf_rdma_post {
- struct __gf_rdma_post *next, *prev;
- struct ibv_mr *mr;
- char *buf;
- int32_t buf_size;
- char aux;
- int32_t reused;
- struct __gf_rdma_device *device;
- gf_rdma_post_type_t type;
- gf_rdma_post_context_t ctx;
- int refcount;
- pthread_mutex_t lock;
-};
-typedef struct __gf_rdma_post gf_rdma_post_t;
-
-struct __gf_rdma_queue {
- gf_rdma_post_t active_posts, passive_posts;
- int32_t active_count, passive_count;
- pthread_mutex_t lock;
-};
-typedef struct __gf_rdma_queue gf_rdma_queue_t;
-
-struct __gf_rdma_qpreg {
- pthread_mutex_t lock;
- int32_t count;
- struct _qpent {
- struct _qpent *next, *prev;
- int32_t qp_num;
- gf_rdma_peer_t *peer;
- } ents[42];
-};
-typedef struct __gf_rdma_qpreg gf_rdma_qpreg_t;
-
-/* context per device, stored in global glusterfs_ctx_t->ib */
-struct __gf_rdma_device {
- struct __gf_rdma_device *next;
- const char *device_name;
- struct ibv_context *context;
- int32_t port;
- struct ibv_pd *pd;
- struct ibv_srq *srq;
- gf_rdma_qpreg_t qpreg;
- struct ibv_comp_channel *send_chan, *recv_chan;
- struct ibv_cq *send_cq, *recv_cq;
- gf_rdma_queue_t sendq, recvq;
- pthread_t send_thread, recv_thread, async_event_thread;
- struct mem_pool *request_ctx_pool;
- struct mem_pool *ioq_pool;
- struct mem_pool *reply_info_pool;
- struct list_head all_mr;
- pthread_mutex_t all_mr_lock;
-};
-typedef struct __gf_rdma_device gf_rdma_device_t;
-
-struct __gf_rdma_arena_mr {
- struct list_head list;
- struct iobuf_arena *iobuf_arena;
- struct ibv_mr *mr;
-};
-
-typedef struct __gf_rdma_arena_mr gf_rdma_arena_mr;
-struct __gf_rdma_ctx {
- gf_rdma_device_t *device;
- struct rdma_event_channel *rdma_cm_event_channel;
- pthread_t rdma_cm_thread;
- pthread_mutex_t lock;
- int32_t dlcount;
-};
-typedef struct __gf_rdma_ctx gf_rdma_ctx_t;
-
-struct __gf_rdma_request_context {
- struct ibv_mr *mr[GF_RDMA_MAX_SEGMENTS];
- int mr_count;
- struct mem_pool *pool;
- gf_rdma_peer_t *peer;
- struct iobref *iobref;
- struct iobref *rsp_iobref;
-};
-typedef struct __gf_rdma_request_context gf_rdma_request_context_t;
-
-typedef enum {
- GF_RDMA_SERVER_LISTENER,
- GF_RDMA_SERVER,
- GF_RDMA_CLIENT,
-} gf_rdma_transport_entity_t;
-
-struct __gf_rdma_private {
- int32_t idx;
- unsigned char connected;
- in_addr_t addr;
- unsigned short port;
-
- /* IB Verbs Driver specific variables, pointers */
- gf_rdma_peer_t peer;
- struct __gf_rdma_device *device;
- gf_rdma_options_t options;
-
- /* Used by trans->op->receive */
- char *data_ptr;
- int32_t data_offset;
- int32_t data_len;
-
- /* Mutex */
- pthread_mutex_t write_mutex;
- rpc_transport_t *listener;
- pthread_mutex_t recv_mutex;
- pthread_cond_t recv_cond;
- gf_rdma_transport_entity_t entity;
- uint32_t backlog;
-};
-typedef struct __gf_rdma_private gf_rdma_private_t;
-
-#endif /* _XPORT_GF_RDMA_H */
diff --git a/rpc/rpc-transport/rdma/src/rpc-trans-rdma-messages.h b/rpc/rpc-transport/rdma/src/rpc-trans-rdma-messages.h
deleted file mode 100644
index 6e9ea9a2925..00000000000
--- a/rpc/rpc-transport/rdma/src/rpc-trans-rdma-messages.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _RPC_TRANS_RDMA_MESSAGES_H_
-#define _RPC_TRANS_RDMA_MESSAGES_H_
-
-#include "glfs-message-id.h"
-
-/* To add new message IDs, append new identifiers at the end of the list.
- *
- * Never remove a message ID. If it's not used anymore, you can rename it or
- * leave it as it is, but not delete it. This is to prevent reutilization of
- * IDs by other messages.
- *
- * The component name must match one of the entries defined in
- * glfs-message-id.h.
- */
-
-GLFS_MSGID(
- RPC_TRANS_RDMA, RDMA_MSG_PORT_BIND_FAILED, RDMA_MSG_POST_CREATION_FAILED,
- RDMA_MSG_DEREGISTER_ARENA_FAILED, RDMA_MSG_MR_ALOC_FAILED,
- RDMA_MSG_PREREG_BUFFER_FAILED, RDMA_MSG_CQ_CREATION_FAILED,
- RDMA_MSG_REQ_NOTIFY_CQ_REVQ_FAILED, RDMA_MSG_QUERY_DEVICE_FAILED,
- RDMA_MSG_REQ_NOTIFY_CQ_SENDQ_FAILED, RDMA_MSG_SEND_COMP_CHAN_FAILED,
- RDMA_MSG_RECV_COMP_CHAN_FAILED, RDMA_MSG_ALOC_PROT_DOM_FAILED,
- RDMA_MSG_CRE_SRQ_FAILED, RDMA_MSG_ALOC_POST_FAILED,
- RDMA_MSG_SEND_COMP_THREAD_FAILED, RDMA_MSG_RECV_COMP_THREAD_FAILED,
- RDMA_MSG_ASYNC_EVENT_THEAD_FAILED, RDMA_MSG_GET_DEVICE_NAME_FAILED,
- RDMA_MSG_GET_IB_DEVICE_FAILED, RDMA_MSG_CREAT_INC_TRANS_FAILED,
- RDMA_MSG_CREAT_QP_FAILED, RDMA_MSG_ACCEPT_FAILED, RDMA_MSG_CONNECT_FAILED,
- RDMA_MSG_ROUTE_RESOLVE_FAILED, RDMA_MSG_GET_DEVICE_FAILED,
- RDMA_MSG_PEER_DISCONNECTED, RDMA_MSG_ENCODE_ERROR,
- RDMA_MSG_POST_SEND_FAILED, RDMA_MSG_READ_CHUNK_VECTOR_FAILED,
- RDMA_MSG_WRITE_CHUNK_VECTOR_FAILED, RDMA_MSG_WRITE_REPLY_CHUNCK_CONFLICT,
- RDMA_MSG_CHUNK_COUNT_GREAT_MAX_SEGMENTS, RDMA_MSG_CREATE_READ_CHUNK_FAILED,
- RDMA_MSG_CREATE_WRITE_REPLAY_FAILED,
- RDMA_MSG_SEND_SIZE_GREAT_INLINE_THRESHOLD,
- RDMA_MSG_REG_ACCESS_LOCAL_WRITE_FAILED, RDMA_MSG_WRITE_PEER_FAILED,
- RDMA_MSG_SEND_REPLY_FAILED, RDMA_MSG_INVALID_CHUNK_TYPE,
- RDMA_MSG_PROC_IOQ_ENTRY_FAILED, RDMA_MSG_NEW_IOQ_ENTRY_FAILED,
- RDMA_MSG_RPC_REPLY_CREATE_FAILED, RDMA_MSG_GET_READ_CHUNK_FAILED,
- RDMA_MSG_GET_WRITE_CHUNK_FAILED, RDMA_MSG_REPLY_INFO_ALLOC_FAILED,
- RDMA_MSG_RDMA_ERROR_RECEIVED, RDMA_MSG_GET_REQ_INFO_RPC_FAILED,
- RDMA_MSG_POLL_IN_NOTIFY_FAILED, RDMA_MSG_HEADER_DECODE_FAILED,
- RDMA_MSG_EVENT_SRQ_LIMIT_REACHED, RDMA_MSG_UNRECG_MQ_VALUE,
- RDMA_MSG_BUFFER_ERROR, RDMA_MSG_OPTION_SET_FAILED, RDMA_MSG_LISTEN_FAILED,
- RDMA_MSG_INIT_IB_DEVICE_FAILED, RDMA_MSG_WRITE_CLIENT_ERROR,
- RDMA_MSG_CHUNKLIST_ERROR, RDMA_MSG_INVALID_ENTRY,
- RDMA_MSG_READ_CLIENT_ERROR, RDMA_MSG_RPC_ST_ERROR,
- RDMA_MSG_PEER_READ_FAILED, RDMA_MSG_POST_MISSING, RDMA_MSG_PEER_REQ_FAILED,
- RDMA_MSG_PEER_REP_FAILED, RDMA_MSG_EVENT_ERROR, RDMA_MSG_IBV_GET_CQ_FAILED,
- RDMA_MSG_IBV_REQ_NOTIFY_CQ_FAILED, RDMA_MSG_RECV_ERROR,
- RDMA_MSG_IBV_POLL_CQ_ERROR, RDMA_MSG_RDMA_HANDLE_FAILED,
- RDMA_MSG_CM_EVENT_FAILED, RDMA_MSG_CLIENT_BIND_FAILED,
- RDMA_MSG_RDMA_RESOLVE_ADDR_FAILED, RDMA_MSG_NW_ADDR_UNKNOWN,
- RDMA_MSG_RDMA_BIND_ADDR_FAILED, RDMA_MSG_SEND_CLIENT_ERROR,
- RDMA_MSG_UNRECG_MTU_VALUE);
-
-#endif /* !_RPC_TRANS_RDMA_MESSAGES_H_ */
diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
index 3d3d108f135..9286bbb236d 100644
--- a/rpc/rpc-transport/socket/src/name.c
+++ b/rpc/rpc-transport/socket/src/name.c
@@ -21,7 +21,7 @@
#include "rpc-transport.h"
#include "socket.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
static void
_assign_port(struct sockaddr *sockaddr, uint16_t port)
@@ -105,10 +105,10 @@ af_unix_client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
struct sockaddr_un *addr = NULL;
int32_t ret = 0;
- path_data = dict_get(this->options, "transport.socket.bind-path");
+ path_data = dict_get_sizen(this->options, "transport.socket.bind-path");
if (path_data) {
char *path = data_to_str(path_data);
- if (!path || strlen(path) > UNIX_PATH_MAX) {
+ if (!path || path_data->len > 108) { /* 108 = addr->sun_path length */
gf_log(this->name, GF_LOG_TRACE,
"bind-path not specified for unix socket, "
"letting connect to assign default value");
@@ -134,7 +134,7 @@ err:
return ret;
}
-int32_t
+static int32_t
client_fill_address_family(rpc_transport_t *this, sa_family_t *sa_family)
{
data_t *address_family_data = NULL;
@@ -145,12 +145,13 @@ client_fill_address_family(rpc_transport_t *this, sa_family_t *sa_family)
goto out;
}
- address_family_data = dict_get(this->options, "transport.address-family");
+ address_family_data = dict_get_sizen(this->options,
+ "transport.address-family");
if (!address_family_data) {
data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get(this->options, "remote-host");
- connect_path_data = dict_get(this->options,
- "transport.socket.connect-path");
+ remote_host_data = dict_get_sizen(this->options, "remote-host");
+ connect_path_data = dict_get_sizen(this->options,
+ "transport.socket.connect-path");
if (!(remote_host_data || connect_path_data) ||
(remote_host_data && connect_path_data)) {
@@ -179,7 +180,7 @@ client_fill_address_family(rpc_transport_t *this, sa_family_t *sa_family)
}
} else {
- char *address_family = data_to_str(address_family_data);
+ const char *address_family = data_to_str(address_family_data);
if (!strcasecmp(address_family, "unix")) {
*sa_family = AF_UNIX;
} else if (!strcasecmp(address_family, "inet")) {
@@ -211,11 +212,12 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
data_t *remote_host_data = NULL;
data_t *remote_port_data = NULL;
char *remote_host = NULL;
- uint16_t remote_port = 0;
+ uint16_t remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
struct addrinfo *addr_info = NULL;
int32_t ret = 0;
+ struct in6_addr serveraddr;
- remote_host_data = dict_get(options, "remote-host");
+ remote_host_data = dict_get_sizen(options, "remote-host");
if (remote_host_data == NULL) {
gf_log(this->name, GF_LOG_ERROR,
"option remote-host missing in volume %s", this->name);
@@ -231,22 +233,27 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
goto err;
}
- remote_port_data = dict_get(options, "remote-port");
+ remote_port_data = dict_get_sizen(options, "remote-port");
if (remote_port_data == NULL) {
gf_log(this->name, GF_LOG_TRACE,
"option remote-port missing in volume %s. Defaulting to %d",
this->name, GF_DEFAULT_SOCKET_LISTEN_PORT);
-
- remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
} else {
remote_port = data_to_uint16(remote_port_data);
+ if (remote_port == (uint16_t)-1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option remote-port has invalid port in volume %s",
+ this->name);
+ ret = -1;
+ goto err;
+ }
}
- if (remote_port == (uint16_t)-1) {
- gf_log(this->name, GF_LOG_ERROR,
- "option remote-port has invalid port in volume %s", this->name);
- ret = -1;
- goto err;
+ /* Need to update transport-address family if address-family is not provided
+ to command-line arguments
+ */
+ if (inet_pton(AF_INET6, remote_host, &serveraddr)) {
+ sockaddr->sa_family = AF_INET6;
}
/* TODO: gf_resolve is a blocking call. kick in some
@@ -274,31 +281,29 @@ af_unix_client_get_remote_sockaddr(rpc_transport_t *this,
struct sockaddr_un *sockaddr_un = NULL;
char *connect_path = NULL;
data_t *connect_path_data = NULL;
- int32_t ret = 0;
+ int32_t ret = -1;
- connect_path_data = dict_get(this->options,
- "transport.socket.connect-path");
+ connect_path_data = dict_get_sizen(this->options,
+ "transport.socket.connect-path");
if (!connect_path_data) {
gf_log(this->name, GF_LOG_ERROR,
"option transport.unix.connect-path not specified for "
"address-family unix");
- ret = -1;
goto err;
}
- connect_path = data_to_str(connect_path_data);
- if (!connect_path) {
+ /* 108 = sockaddr_un->sun_path length */
+ if ((connect_path_data->len + 1) > 108) {
gf_log(this->name, GF_LOG_ERROR,
- "transport.unix.connect-path is null-string");
- ret = -1;
+ "connect-path value length %d > %d octets",
+ connect_path_data->len + 1, UNIX_PATH_MAX);
goto err;
}
- if ((strlen(connect_path) + 1) > UNIX_PATH_MAX) {
+ connect_path = data_to_str(connect_path_data);
+ if (!connect_path) {
gf_log(this->name, GF_LOG_ERROR,
- "connect-path value length %" GF_PRI_SIZET " > %d octets",
- strlen(connect_path), UNIX_PATH_MAX);
- ret = -1;
+ "transport.unix.connect-path is null-string");
goto err;
}
@@ -307,6 +312,7 @@ af_unix_client_get_remote_sockaddr(rpc_transport_t *this,
strcpy(sockaddr_un->sun_path, connect_path);
*sockaddr_len = sizeof(struct sockaddr_un);
+ ret = 0;
err:
return ret;
}
@@ -320,7 +326,8 @@ af_unix_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
int32_t ret = 0;
struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
- listen_path_data = dict_get(this->options, "transport.socket.listen-path");
+ listen_path_data = dict_get_sizen(this->options,
+ "transport.socket.listen-path");
if (!listen_path_data) {
gf_log(this->name, GF_LOG_ERROR,
"missing option transport.socket.listen-path");
@@ -334,7 +341,7 @@ af_unix_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
#define UNIX_PATH_MAX 108
#endif
- if ((strlen(listen_path) + 1) > UNIX_PATH_MAX) {
+ if ((listen_path_data->len + 1) > UNIX_PATH_MAX) {
gf_log(this->name, GF_LOG_ERROR,
"option transport.unix.listen-path has value length "
"%" GF_PRI_SIZET " > %d",
@@ -362,14 +369,19 @@ af_inet_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
dict_t *options = NULL;
int32_t ret = 0;
+ /* initializes addr_len */
+ *addr_len = 0;
+
options = this->options;
- listen_port_data = dict_get(options, "transport.socket.listen-port");
+ listen_port_data = dict_get_sizen(options, "transport.socket.listen-port");
if (listen_port_data) {
listen_port = data_to_uint16(listen_port_data);
+ } else {
+ listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
}
- listen_host_data = dict_get(options, "transport.socket.bind-address");
+ listen_host_data = dict_get_sizen(options, "transport.socket.bind-address");
if (listen_host_data) {
listen_host = data_to_str(listen_host_data);
} else {
@@ -413,9 +425,13 @@ af_inet_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
}
}
- if (!(*addr_len) && res) {
- memcpy(addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
+ if (!(*addr_len)) {
+ if (res && res->ai_addr) {
+ memcpy(addr, res->ai_addr, res->ai_addrlen);
+ *addr_len = res->ai_addrlen;
+ } else {
+ ret = -1;
+ }
}
freeaddrinfo(res);
@@ -518,7 +534,10 @@ socket_client_get_remote_sockaddr(rpc_transport_t *this,
ret = -1;
}
- if (*sa_family == AF_UNSPEC) {
+ /* Address-family is updated based on remote_host in
+ af_inet_client_get_remote_sockaddr
+ */
+ if (*sa_family != sockaddr->sa_family) {
*sa_family = sockaddr->sa_family;
}
@@ -526,23 +545,24 @@ err:
return ret;
}
-int32_t
+static int32_t
server_fill_address_family(rpc_transport_t *this, sa_family_t *sa_family)
{
data_t *address_family_data = NULL;
int32_t ret = -1;
#ifdef IPV6_DEFAULT
- char *addr_family = "inet6";
+ const char *addr_family = "inet6";
sa_family_t default_family = AF_INET6;
#else
- char *addr_family = "inet";
+ const char *addr_family = "inet";
sa_family_t default_family = AF_INET;
#endif
GF_VALIDATE_OR_GOTO("socket", sa_family, out);
- address_family_data = dict_get(this->options, "transport.address-family");
+ address_family_data = dict_get_sizen(this->options,
+ "transport.address-family");
if (address_family_data) {
char *address_family = NULL;
address_family = data_to_str(address_family_data);
@@ -594,7 +614,7 @@ socket_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
switch (addr->sa_family) {
case AF_INET_SDP:
addr->sa_family = AF_INET;
- /* Fall through */
+ /* Fall through */
case AF_INET:
case AF_INET6:
case AF_UNSPEC:
@@ -614,7 +634,7 @@ err:
return ret;
}
-int32_t
+static int32_t
fill_inet6_inet_identifiers(rpc_transport_t *this,
struct sockaddr_storage *addr, int32_t addr_len,
char *identifier)
diff --git a/rpc/rpc-transport/socket/src/name.h b/rpc/rpc-transport/socket/src/name.h
index 85f8bb7040b..080c7588f5a 100644
--- a/rpc/rpc-transport/socket/src/name.h
+++ b/rpc/rpc-transport/socket/src/name.h
@@ -11,7 +11,7 @@
#ifndef _SOCKET_NAME_H
#define _SOCKET_NAME_H
-#include "compat.h"
+#include <glusterfs/compat.h>
int32_t
client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
diff --git a/rpc/rpc-transport/socket/src/socket-mem-types.h b/rpc/rpc-transport/socket/src/socket-mem-types.h
index edf3ed0c6f1..241ce67f670 100644
--- a/rpc/rpc-transport/socket/src/socket-mem-types.h
+++ b/rpc/rpc-transport/socket/src/socket-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __SOCKET_MEM_TYPES_H__
#define __SOCKET_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
typedef enum gf_sock_mem_types_ {
gf_sock_connect_error_state_t = gf_common_mt_end + 1,
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 776e647d4f6..ed8b473be23 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -10,22 +10,16 @@
#include "socket.h"
#include "name.h"
-#include "dict.h"
-#include "rpc-transport.h"
-#include "logging.h"
-#include "xlator.h"
-#include "syscall.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
#include "socket-mem-types.h"
-#include "timer.h"
/* ugly #includes below */
#include "protocol-common.h"
#include "glusterfs3-xdr.h"
#include "glusterfs4-xdr.h"
-#include "xdr-nfs3.h"
#include "rpcsvc.h"
/* for TCP_USER_TIMEOUT */
@@ -35,7 +29,6 @@
#include <netinet/tcp.h>
#endif
-#include <fcntl.h>
#include <errno.h>
#include <rpc/xdr.h>
#include <sys/ioctl.h>
@@ -139,7 +132,7 @@ ssl_setup_connection_params(rpc_transport_t *this);
\
gf_log(this->name, GF_LOG_TRACE, \
"partial read on non-blocking socket"); \
- \
+ ret = 0; \
break; \
} \
}
@@ -173,7 +166,7 @@ ssl_setup_connection_params(rpc_transport_t *this);
\
ret = __socket_readv(this, in->pending_vector, 1, &in->pending_vector, \
&in->pending_count, &bytes_read); \
- if (ret == -1) \
+ if (ret < 0) \
break; \
__socket_proto_update_priv_after_read(priv, ret, bytes_read); \
}
@@ -198,7 +191,7 @@ socket_dump_info(struct sockaddr *sa, int is_server, int is_ssl, int sock,
0,
};
char *addr = NULL;
- char *peer_type = NULL;
+ const char *peer_type = NULL;
int af = sa->sa_family;
int so_error = -1;
socklen_t slen = sizeof(so_error);
@@ -253,7 +246,6 @@ ssl_do(rpc_transport_t *this, void *buf, size_t len, SSL_trinary_func *func)
int r = (-1);
socket_private_t *priv = NULL;
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
priv = this->private;
if (buf) {
@@ -269,11 +261,9 @@ ssl_do(rpc_transport_t *this, void *buf, size_t len, SSL_trinary_func *func)
}
r = func(priv->ssl_ssl, buf, len);
} else {
- /*
- * We actually need these functions to get to
- * priv->connected == 1.
- */
- r = ((SSL_unary_func *)func)(priv->ssl_ssl);
+ /* This should be treated as error */
+ gf_log(this->name, GF_LOG_ERROR, "buffer is empty %s", __func__);
+ goto out;
}
switch (SSL_get_error(priv->ssl_ssl, r)) {
case SSL_ERROR_NONE:
@@ -308,14 +298,69 @@ out:
#define ssl_write_one(t, b, l) \
ssl_do((t), (b), (l), (SSL_trinary_func *)SSL_write)
-int
-ssl_setup_connection_prefix(rpc_transport_t *this)
+/* set crl verify flags only for server */
+/* see man X509_VERIFY_PARAM_SET_FLAGS(3)
+ * X509_V_FLAG_CRL_CHECK enables CRL checking for the certificate chain
+ * leaf certificate. An error occurs if a suitable CRL cannot be found.
+ * Since we're never going to revoke a gluster node cert, we better disable
+ * CRL check for server certs to avoid getting error and failed connection
+ * attempts.
+ */
+static void
+ssl_clear_crl_verify_flags(SSL_CTX *ssl_ctx)
+{
+#ifdef X509_V_FLAG_CRL_CHECK_ALL
+#ifdef HAVE_SSL_CTX_GET0_PARAM
+ X509_VERIFY_PARAM *vpm;
+
+ vpm = SSL_CTX_get0_param(ssl_ctx);
+ if (vpm) {
+ X509_VERIFY_PARAM_clear_flags(
+ vpm, (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL));
+ }
+#else
+ /* CRL verify flag need not be cleared for rhel6 kind of clients */
+#endif
+#else
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL");
+#endif
+ return;
+}
+
+/* set crl verify flags only for server */
+static void
+ssl_set_crl_verify_flags(SSL_CTX *ssl_ctx)
+{
+#ifdef X509_V_FLAG_CRL_CHECK_ALL
+#ifdef HAVE_SSL_CTX_GET0_PARAM
+ X509_VERIFY_PARAM *vpm;
+
+ vpm = SSL_CTX_get0_param(ssl_ctx);
+ if (vpm) {
+ unsigned long flags;
+
+ flags = X509_VERIFY_PARAM_get_flags(vpm);
+ flags |= (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
+ X509_VERIFY_PARAM_set_flags(vpm, flags);
+ }
+#else
+ X509_STORE *x509store;
+
+ x509store = SSL_CTX_get_cert_store(ssl_ctx);
+ X509_STORE_set_flags(x509store,
+ X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
+#endif
+#else
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL");
+#endif
+}
+
+static int
+ssl_setup_connection_prefix(rpc_transport_t *this, gf_boolean_t server)
{
int ret = -1;
socket_private_t *priv = NULL;
- GF_VALIDATE_OR_GOTO(this->name, this->private, done);
-
priv = this->private;
if (ssl_setup_connection_params(this) < 0) {
@@ -332,6 +377,9 @@ ssl_setup_connection_prefix(rpc_transport_t *this)
priv->ssl_accepted = _gf_false;
priv->ssl_context_created = _gf_false;
+ if (!server && priv->crl_path)
+ ssl_clear_crl_verify_flags(priv->ssl_ctx);
+
priv->ssl_ssl = SSL_new(priv->ssl_ctx);
if (!priv->ssl_ssl) {
gf_log(this->name, GF_LOG_ERROR, "SSL_new failed");
@@ -364,7 +412,6 @@ ssl_setup_connection_postfix(rpc_transport_t *this)
char peer_CN[256] = "";
socket_private_t *priv = NULL;
- GF_VALIDATE_OR_GOTO(this->name, this->private, done);
priv = this->private;
/* Make sure _SSL verification_ succeeded, yielding an identity. */
@@ -386,6 +433,7 @@ ssl_setup_connection_postfix(rpc_transport_t *this)
gf_log(this->name, GF_LOG_DEBUG,
"SSL verification succeeded (client: %s) (server: %s)",
this->peerinfo.identifier, this->myinfo.identifier);
+ X509_free(peer);
return gf_strdup(peer_CN);
/* Error paths. */
@@ -397,11 +445,10 @@ ssl_error:
SSL_free(priv->ssl_ssl);
priv->ssl_ssl = NULL;
-done:
return NULL;
}
-int
+static int
ssl_complete_connection(rpc_transport_t *this)
{
int ret = -1; /* 1 : implies go back to epoll_wait()
@@ -640,7 +687,6 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
int opcount = 0;
int moved = 0;
- GF_VALIDATE_OR_GOTO("socket", this, out);
GF_VALIDATE_OR_GOTO("socket", this->private, out);
priv = this->private;
@@ -679,11 +725,11 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
ret = sys_writev(sock, opvector, IOV_MIN(opcount));
}
- if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
+ if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) {
/* done for now */
break;
- }
- this->total_bytes_write += ret;
+ } else if (ret > 0)
+ this->total_bytes_write += ret;
} else {
ret = __socket_cached_read(this, opvector, opcount);
if (ret == 0) {
@@ -694,11 +740,11 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
errno = ENODATA;
ret = -1;
}
- if (ret == -1 && errno == EAGAIN) {
+ if ((ret < 0) && (errno == EAGAIN)) {
/* done for now */
break;
- }
- this->total_bytes_read += ret;
+ } else if (ret > 0)
+ this->total_bytes_read += ret;
}
if (ret == 0) {
@@ -710,7 +756,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
errno = ENOTCONN;
break;
}
- if (ret == -1) {
+ if (ret < 0) {
if (errno == EINTR)
continue;
@@ -773,24 +819,16 @@ static int
__socket_readv(rpc_transport_t *this, struct iovec *vector, int count,
struct iovec **pending_vector, int *pending_count, size_t *bytes)
{
- int ret = -1;
-
- ret = __socket_rwv(this, vector, count, pending_vector, pending_count,
- bytes, 0);
-
- return ret;
+ return __socket_rwv(this, vector, count, pending_vector, pending_count,
+ bytes, 0);
}
static int
__socket_writev(rpc_transport_t *this, struct iovec *vector, int count,
struct iovec **pending_vector, int *pending_count)
{
- int ret = -1;
-
- ret = __socket_rwv(this, vector, count, pending_vector, pending_count, NULL,
- 1);
-
- return ret;
+ return __socket_rwv(this, vector, count, pending_vector, pending_count,
+ NULL, 1);
}
static int
@@ -807,8 +845,8 @@ __socket_shutdown(rpc_transport_t *this)
gf_log(this->name, GF_LOG_DEBUG, "shutdown() returned %d. %s", ret,
strerror(errno));
} else {
- gf_log(this->name, GF_LOG_INFO, "intentional socket shutdown(%d)",
- priv->sock);
+ GF_LOG_OCCASIONALLY(priv->shutdown_log_ctr, this->name, GF_LOG_INFO,
+ "intentional socket shutdown(%d)", priv->sock);
}
return ret;
@@ -817,20 +855,14 @@ __socket_shutdown(rpc_transport_t *this)
static int
__socket_teardown_connection(rpc_transport_t *this)
{
- int ret = -1;
socket_private_t *priv = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
if (priv->use_ssl)
ssl_teardown_connection(priv);
- ret = __socket_shutdown(this);
-out:
- return ret;
+ return __socket_shutdown(this);
}
static int
@@ -839,15 +871,12 @@ __socket_disconnect(rpc_transport_t *this)
int ret = -1;
socket_private_t *priv = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
gf_log(this->name, GF_LOG_TRACE, "disconnecting %p, sock=%d", this,
priv->sock);
- if (priv->sock != -1) {
+ if (priv->sock >= 0) {
gf_log_callingfn(this->name, GF_LOG_TRACE,
"tearing down socket connection");
ret = __socket_teardown_connection(this);
@@ -858,7 +887,6 @@ __socket_disconnect(rpc_transport_t *this)
}
}
-out:
return ret;
}
@@ -872,9 +900,8 @@ __socket_server_bind(rpc_transport_t *this)
int ret = -1;
int opt = 1;
int reuse_check_sock = -1;
-
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
+ uint16_t sin_port = 0;
+ int retries = 0;
priv = this->private;
ctx = this->ctx;
@@ -882,7 +909,7 @@ __socket_server_bind(rpc_transport_t *this)
ret = setsockopt(priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_ERROR,
"setsockopt() for SO_REUSEADDR failed (%s)", strerror(errno));
}
@@ -895,7 +922,7 @@ __socket_server_bind(rpc_transport_t *this)
if (reuse_check_sock >= 0) {
ret = connect(reuse_check_sock, SA(&unix_addr),
this->myinfo.sockaddr_len);
- if ((ret == -1) && (ECONNREFUSED == errno)) {
+ if ((ret != 0) && (ECONNREFUSED == errno)) {
sys_unlink(((struct sockaddr_un *)&unix_addr)->sun_path);
}
gf_log(this->name, GF_LOG_INFO,
@@ -904,19 +931,47 @@ __socket_server_bind(rpc_transport_t *this)
}
}
- ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len);
+ if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+ sin_port = (int)ntohs(
+ ((struct sockaddr_in *)&this->myinfo.sockaddr)->sin_port);
+ if (!sin_port) {
+ sin_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
+ ((struct sockaddr_in *)&this->myinfo.sockaddr)->sin_port = htons(
+ sin_port);
+ }
+ retries = 10;
+ while (retries) {
+ ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
+ this->myinfo.identifier, strerror(errno));
+ if (errno == EADDRINUSE) {
+ gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
+ sleep(1);
+ retries--;
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ } else {
+ ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
- this->myinfo.identifier, strerror(errno));
- if (errno == EADDRINUSE) {
- gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
+ this->myinfo.identifier, strerror(errno));
+ if (errno == EADDRINUSE) {
+ gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
+ }
}
}
if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
if (getsockname(priv->sock, SA(&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len) == -1) {
+ &this->myinfo.sockaddr_len) != 0) {
gf_log(this->name, GF_LOG_WARNING,
"getsockname on (%d) failed (%s)", priv->sock,
strerror(errno));
@@ -944,7 +999,7 @@ __socket_nonblock(int fd)
flags = fcntl(fd, F_GETFL);
- if (flags != -1)
+ if (flags >= 0)
ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
return ret;
@@ -974,7 +1029,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
#endif
ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
- if (ret == -1) {
+ if (ret != 0) {
gf_log("socket", GF_LOG_WARNING,
"failed to set keep alive option on socket %d", fd);
goto err;
@@ -991,7 +1046,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepaliveintvl,
sizeof(keepaliveintvl));
#endif
- if (ret == -1) {
+ if (ret != 0) {
gf_log("socket", GF_LOG_WARNING,
"failed to set keep alive interval on socket %d", fd);
goto err;
@@ -1002,7 +1057,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepaliveidle,
sizeof(keepaliveidle));
- if (ret == -1) {
+ if (ret != 0) {
gf_log("socket", GF_LOG_WARNING,
"failed to set keep idle %d on socket %d, %s", keepaliveidle, fd,
strerror(errno));
@@ -1010,7 +1065,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
}
ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepaliveintvl,
sizeof(keepaliveintvl));
- if (ret == -1) {
+ if (ret != 0) {
gf_log("socket", GF_LOG_WARNING,
"failed to set keep interval %d on socket %d, %s",
keepaliveintvl, fd, strerror(errno));
@@ -1022,7 +1077,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
goto done;
ret = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_ms,
sizeof(timeout_ms));
- if (ret == -1) {
+ if (ret != 0) {
gf_log("socket", GF_LOG_WARNING,
"failed to set "
"TCP_USER_TIMEOUT %d on socket %d, %s",
@@ -1033,7 +1088,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
#if defined(TCP_KEEPCNT)
ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &keepalivecnt,
sizeof(keepalivecnt));
- if (ret == -1) {
+ if (ret != 0) {
gf_log("socket", GF_LOG_WARNING,
"failed to set "
"TCP_KEEPCNT %d on socket %d, %s",
@@ -1075,9 +1130,6 @@ __socket_reset(rpc_transport_t *this)
{
socket_private_t *priv = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
/* TODO: use mem-pool on incoming data */
@@ -1096,8 +1148,16 @@ __socket_reset(rpc_transport_t *this)
memset(&priv->incoming, 0, sizeof(priv->incoming));
- event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx);
-
+ gf_event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx);
+ if (priv->use_ssl && priv->ssl_ssl) {
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+ if (priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
+ }
priv->sock = -1;
priv->idx = -1;
priv->connected = -1;
@@ -1117,8 +1177,6 @@ __socket_reset(rpc_transport_t *this)
GF_FREE(priv->ssl_ca_list);
priv->ssl_ca_list = NULL;
}
-out:
- return;
}
static void
@@ -1148,8 +1206,6 @@ __socket_ioq_new(rpc_transport_t *this, rpc_transport_msg_t *msg)
int count = 0;
uint32_t size = 0;
- GF_VALIDATE_OR_GOTO("socket", this, out);
-
/* TODO: use mem-pool */
entry = GF_CALLOC(1, sizeof(*entry), gf_common_mt_ioq);
if (!entry)
@@ -1204,7 +1260,6 @@ __socket_ioq_new(rpc_transport_t *this, rpc_transport_msg_t *msg)
INIT_LIST_HEAD(&entry->list);
-out:
return entry;
}
@@ -1225,27 +1280,18 @@ out:
}
static void
-__socket_ioq_flush(rpc_transport_t *this)
+__socket_ioq_flush(socket_private_t *priv)
{
- socket_private_t *priv = NULL;
struct ioq *entry = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
- priv = this->private;
-
while (!list_empty(&priv->ioq)) {
entry = priv->ioq_next;
__socket_ioq_entry_free(entry);
}
-
-out:
- return;
}
static int
-__socket_ioq_churn_entry(rpc_transport_t *this, struct ioq *entry, int direct)
+__socket_ioq_churn_entry(rpc_transport_t *this, struct ioq *entry)
{
int ret = -1;
@@ -1268,16 +1314,13 @@ __socket_ioq_churn(rpc_transport_t *this)
int ret = 0;
struct ioq *entry = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
while (!list_empty(&priv->ioq)) {
/* pick next entry */
entry = priv->ioq_next;
- ret = __socket_ioq_churn_entry(this, entry, 0);
+ ret = __socket_ioq_churn_entry(this, entry);
if (ret != 0)
break;
@@ -1285,11 +1328,10 @@ __socket_ioq_churn(rpc_transport_t *this)
if (list_empty(&priv->ioq)) {
/* all pending writes done, not interested in POLLOUT */
- priv->idx = event_select_on(this->ctx->event_pool, priv->sock,
- priv->idx, -1, 0);
+ priv->idx = gf_event_select_on(this->ctx->event_pool, priv->sock,
+ priv->idx, -1, 0);
}
-out:
return ret;
}
@@ -1299,22 +1341,17 @@ socket_event_poll_err(rpc_transport_t *this, int gen, int idx)
socket_private_t *priv = NULL;
gf_boolean_t socket_closed = _gf_false;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
- pthread_mutex_lock(&priv->in_lock);
pthread_mutex_lock(&priv->out_lock);
{
- if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock != -1)) {
- __socket_ioq_flush(this);
+ if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock >= 0)) {
+ __socket_ioq_flush(priv);
__socket_reset(this);
socket_closed = _gf_true;
}
}
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
if (socket_closed) {
pthread_mutex_lock(&priv->notify.lock);
@@ -1327,7 +1364,6 @@ socket_event_poll_err(rpc_transport_t *this, int gen, int idx)
rpc_transport_notify(this, RPC_TRANSPORT_DISCONNECT, this);
}
-out:
return socket_closed;
}
@@ -1337,9 +1373,6 @@ socket_event_poll_out(rpc_transport_t *this)
socket_private_t *priv = NULL;
int ret = -1;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
pthread_mutex_lock(&priv->out_lock);
@@ -1347,7 +1380,7 @@ socket_event_poll_out(rpc_transport_t *this)
if (priv->connected == 1) {
ret = __socket_ioq_churn(this);
- if (ret == -1) {
+ if (ret < 0) {
gf_log(this->name, GF_LOG_TRACE,
"__socket_ioq_churn returned -1; "
"disconnecting socket");
@@ -1358,9 +1391,11 @@ socket_event_poll_out(rpc_transport_t *this)
pthread_mutex_unlock(&priv->out_lock);
if (ret == 0)
- ret = rpc_transport_notify(this, RPC_TRANSPORT_MSG_SENT, NULL);
+ rpc_transport_notify(this, RPC_TRANSPORT_MSG_SENT, NULL);
+
+ if (ret > 0)
+ ret = 0;
-out:
return ret;
}
@@ -1403,7 +1438,7 @@ __socket_read_simple_msg(rpc_transport_t *this)
&bytes_read);
}
- if (ret == -1) {
+ if (ret < 0) {
gf_log(this->name, GF_LOG_WARNING,
"reading from socket failed. Error (%s), "
"peer (%s)",
@@ -1417,6 +1452,7 @@ __socket_read_simple_msg(rpc_transport_t *this)
if (ret > 0) {
gf_log(this->name, GF_LOG_TRACE,
"partial read on non-blocking socket.");
+ ret = 0;
break;
}
@@ -1429,12 +1465,6 @@ out:
return ret;
}
-static int
-__socket_read_simple_request(rpc_transport_t *this)
-{
- return __socket_read_simple_msg(this);
-}
-
#define rpc_cred_addr(buf) (buf + RPC_MSGTYPE_SIZE + RPC_CALL_BODY_SIZE - 4)
#define rpc_verf_addr(fragcurrent) (fragcurrent - 4)
@@ -1461,9 +1491,6 @@ __socket_read_vectored_request(rpc_transport_t *this,
struct gf_sock_incoming_frag *frag = NULL;
sp_rpcfrag_request_state_t *request = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
/* used to reduce the indirection */
@@ -1600,8 +1627,8 @@ __socket_read_vectored_request(rpc_transport_t *this,
remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
- RPC_LASTFRAG(in->fraghdr))) {
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
request->vector_state = SP_STATE_VECTORED_REQUEST_INIT;
in->payload_vector.iov_len = ((unsigned long)frag->fragcurrent -
(unsigned long)
@@ -1610,7 +1637,6 @@ __socket_read_vectored_request(rpc_transport_t *this,
break;
}
-out:
return ret;
}
@@ -1627,9 +1653,6 @@ __socket_read_request(rpc_transport_t *this)
struct gf_sock_incoming_frag *frag = NULL;
sp_rpcfrag_request_state_t *request = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
/* used to reduce the indirection */
@@ -1673,20 +1696,19 @@ __socket_read_request(rpc_transport_t *this)
if (vector_sizer) {
ret = __socket_read_vectored_request(this, vector_sizer);
} else {
- ret = __socket_read_simple_request(this);
+ ret = __socket_read_simple_msg(this);
}
remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
- (RPC_LASTFRAG(in->fraghdr)))) {
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
request->header_state = SP_STATE_REQUEST_HEADER_INIT;
}
break;
}
-out:
return ret;
}
@@ -1704,9 +1726,7 @@ __socket_read_accepted_successful_reply(rpc_transport_t *this)
XDR xdr;
struct gf_sock_incoming *in = NULL;
struct gf_sock_incoming_frag *frag = NULL;
-
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
+ uint32_t remaining_size = 0;
priv = this->private;
@@ -1807,7 +1827,9 @@ __socket_read_accepted_successful_reply(rpc_transport_t *this)
case SP_STATE_READ_PROC_HEADER:
/* now read the entire remaining msg into new iobuf */
ret = __socket_read_simple_msg(this);
- if ((ret == -1) || ((ret == 0) && RPC_LASTFRAG(in->fraghdr))) {
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
frag->call_body.reply.accepted_success_state =
SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
}
@@ -1833,9 +1855,7 @@ __socket_read_accepted_successful_reply_v2(rpc_transport_t *this)
XDR xdr;
struct gf_sock_incoming *in = NULL;
struct gf_sock_incoming_frag *frag = NULL;
-
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
+ uint32_t remaining_size = 0;
priv = this->private;
@@ -1937,7 +1957,9 @@ __socket_read_accepted_successful_reply_v2(rpc_transport_t *this)
case SP_STATE_READ_PROC_HEADER:
/* now read the entire remaining msg into new iobuf */
ret = __socket_read_simple_msg(this);
- if ((ret == -1) || ((ret == 0) && RPC_LASTFRAG(in->fraghdr))) {
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
frag->call_body.reply.accepted_success_state =
SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
}
@@ -1963,9 +1985,6 @@ __socket_read_accepted_reply(rpc_transport_t *this)
struct gf_sock_incoming *in = NULL;
struct gf_sock_incoming_frag *frag = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
/* used to reduce the indirection */
in = &priv->incoming;
@@ -2036,8 +2055,8 @@ __socket_read_accepted_reply(rpc_transport_t *this)
remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
- (RPC_LASTFRAG(in->fraghdr)))) {
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
frag->call_body.reply
.accepted_state = SP_STATE_ACCEPTED_REPLY_INIT;
}
@@ -2045,7 +2064,6 @@ __socket_read_accepted_reply(rpc_transport_t *this)
break;
}
-out:
return ret;
}
@@ -2067,9 +2085,6 @@ __socket_read_vectored_reply(rpc_transport_t *this)
struct gf_sock_incoming *in = NULL;
struct gf_sock_incoming_frag *frag = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
in = &priv->incoming;
frag = &in->frag;
@@ -2102,8 +2117,8 @@ __socket_read_vectored_reply(rpc_transport_t *this)
remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
- (RPC_LASTFRAG(in->fraghdr)))) {
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
frag->call_body.reply
.status_state = SP_STATE_VECTORED_REPLY_STATUS_INIT;
in->payload_vector.iov_len = (unsigned long)frag->fragcurrent -
@@ -2113,7 +2128,6 @@ __socket_read_vectored_reply(rpc_transport_t *this)
break;
}
-out:
return ret;
}
@@ -2136,9 +2150,6 @@ __socket_read_reply(rpc_transport_t *this)
struct gf_sock_incoming *in = NULL;
struct gf_sock_incoming_frag *frag = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
in = &priv->incoming;
frag = &in->frag;
@@ -2164,17 +2175,13 @@ __socket_read_reply(rpc_transport_t *this)
* and priv->lock, since we are doing an upcall here.
*/
frag->state = SP_STATE_NOTIFYING_XID;
- pthread_mutex_unlock(&priv->in_lock);
- {
- ret = rpc_transport_notify(this, RPC_TRANSPORT_MAP_XID_REQUEST,
- in->request_info);
- }
- pthread_mutex_lock(&priv->in_lock);
+ ret = rpc_transport_notify(this, RPC_TRANSPORT_MAP_XID_REQUEST,
+ in->request_info);
/* Transition back to externally visible state. */
frag->state = SP_STATE_READ_MSGTYPE;
- if (ret == -1) {
+ if (ret < 0) {
gf_log(this->name, GF_LOG_WARNING,
"notify for event MAP_XID failed for %s",
this->peerinfo.identifier);
@@ -2208,9 +2215,6 @@ __socket_read_frag(rpc_transport_t *this)
struct gf_sock_incoming *in = NULL;
struct gf_sock_incoming_frag *frag = NULL;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
/* used to reduce the indirection */
in = &priv->incoming;
@@ -2252,8 +2256,8 @@ __socket_read_frag(rpc_transport_t *this)
remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) &&
- (RPC_LASTFRAG(in->fraghdr)))) {
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
/* frag->state = SP_STATE_NADA; */
frag->state = SP_STATE_RPCFRAG_INIT;
}
@@ -2268,7 +2272,6 @@ __socket_read_frag(rpc_transport_t *this)
break;
}
-out:
return ret;
}
@@ -2337,13 +2340,14 @@ __socket_proto_state_machine(rpc_transport_t *this,
ret = __socket_readv(this, in->pending_vector, 1,
&in->pending_vector, &in->pending_count,
NULL);
- if (ret == -1)
+ if (ret < 0)
goto out;
if (ret > 0) {
gf_log(this->name, GF_LOG_TRACE,
"partial "
"fragment header read");
+ ret = 0;
goto out;
}
@@ -2358,7 +2362,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr);
if (in->total_bytes_read >= GF_UNIT_GB) {
- ret = -ENOMEM;
+ ret = -1;
goto out;
}
@@ -2366,7 +2370,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
this->ctx->iobuf_pool,
(in->total_bytes_read + sizeof(in->fraghdr)));
if (!iobuf) {
- ret = -ENOMEM;
+ ret = -1;
goto out;
}
@@ -2393,7 +2397,7 @@ __socket_proto_state_machine(rpc_transport_t *this,
case SP_STATE_READING_FRAG:
ret = __socket_read_frag(this);
- if ((ret == -1) ||
+ if ((ret < 0) ||
(frag->bytes_read != RPC_FRAGSIZE(in->fraghdr))) {
goto out;
}
@@ -2474,10 +2478,6 @@ __socket_proto_state_machine(rpc_transport_t *this,
}
out:
- if ((ret == -1) && (errno == EAGAIN)) {
- ret = 0;
- }
-
return ret;
}
@@ -2485,22 +2485,34 @@ static int
socket_proto_state_machine(rpc_transport_t *this,
rpc_transport_pollin_t **pollin)
{
- socket_private_t *priv = NULL;
- int ret = 0;
+ return __socket_proto_state_machine(this, pollin);
+}
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
+static void
+socket_event_poll_in_async(xlator_t *xl, gf_async_t *async)
+{
+ rpc_transport_pollin_t *pollin;
+ rpc_transport_t *this;
+ socket_private_t *priv;
+ pollin = caa_container_of(async, rpc_transport_pollin_t, async);
+ this = pollin->trans;
priv = this->private;
- pthread_mutex_lock(&priv->in_lock);
+ rpc_transport_notify(this, RPC_TRANSPORT_MSG_RECEIVED, pollin);
+
+ rpc_transport_unref(this);
+
+ rpc_transport_pollin_destroy(pollin);
+
+ pthread_mutex_lock(&priv->notify.lock);
{
- ret = __socket_proto_state_machine(this, pollin);
- }
- pthread_mutex_unlock(&priv->in_lock);
+ --priv->notify.in_progress;
-out:
- return ret;
+ if (!priv->notify.in_progress)
+ pthread_cond_signal(&priv->notify.cond);
+ }
+ pthread_mutex_unlock(&priv->notify.lock);
}
static int
@@ -2523,22 +2535,12 @@ socket_event_poll_in(rpc_transport_t *this, gf_boolean_t notify_handled)
pthread_mutex_unlock(&priv->notify.lock);
}
- if (notify_handled && (ret != -1))
- event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen);
+ if (notify_handled && (ret >= 0))
+ gf_event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen);
if (pollin) {
- ret = rpc_transport_notify(this, RPC_TRANSPORT_MSG_RECEIVED, pollin);
-
- rpc_transport_pollin_destroy(pollin);
-
- pthread_mutex_lock(&priv->notify.lock);
- {
- --priv->notify.in_progress;
-
- if (!priv->notify.in_progress)
- pthread_cond_signal(&priv->notify.cond);
- }
- pthread_mutex_unlock(&priv->notify.lock);
+ rpc_transport_ref(this);
+ gf_async(&pollin->async, THIS, socket_event_poll_in_async);
}
return ret;
@@ -2552,12 +2554,8 @@ socket_connect_finish(rpc_transport_t *this)
rpc_transport_event_t event = 0;
char notify_rpc = 0;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
- pthread_mutex_lock(&priv->in_lock);
pthread_mutex_lock(&priv->out_lock);
{
if (priv->connected != 0)
@@ -2567,10 +2565,10 @@ socket_connect_finish(rpc_transport_t *this)
ret = __socket_connect_finish(priv->sock);
- if (ret == -1 && errno == EINPROGRESS)
+ if ((ret < 0) && (errno == EINPROGRESS))
ret = 1;
- if (ret == -1 && errno != EINPROGRESS) {
+ if ((ret < 0) && (errno != EINPROGRESS)) {
if (!priv->connect_finish_log) {
gf_log(this->name, GF_LOG_ERROR,
"connection to %s failed (%s); "
@@ -2589,7 +2587,7 @@ socket_connect_finish(rpc_transport_t *this)
ret = getsockname(priv->sock, SA(&this->myinfo.sockaddr),
&this->myinfo.sockaddr_len);
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_WARNING,
"getsockname on (%d) failed (%s) - "
"disconnecting socket",
@@ -2606,12 +2604,11 @@ socket_connect_finish(rpc_transport_t *this)
}
unlock:
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
if (notify_rpc) {
rpc_transport_notify(this, event, this);
}
-out:
+
return ret;
}
@@ -2620,12 +2617,8 @@ socket_disconnect(rpc_transport_t *this, gf_boolean_t wait);
/* socket_is_connected() is for use only in socket_event_handler() */
static inline gf_boolean_t
-socket_is_connected(rpc_transport_t *this)
+socket_is_connected(socket_private_t *priv)
{
- socket_private_t *priv = NULL;
-
- priv = this->private;
-
if (priv->use_ssl) {
return priv->is_server ? priv->ssl_accepted : priv->ssl_connected;
} else {
@@ -2650,10 +2643,10 @@ ssl_rearm_event_fd(rpc_transport_t *this)
fd = priv->sock;
if (priv->ssl_error_required == SSL_ERROR_WANT_READ)
- event_select_on(ctx->event_pool, fd, idx, 1, -1);
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, -1);
if (priv->ssl_error_required == SSL_ERROR_WANT_WRITE)
- event_select_on(ctx->event_pool, fd, idx, -1, 1);
- event_handled(ctx->event_pool, fd, idx, gen);
+ gf_event_select_on(ctx->event_pool, fd, idx, -1, 1);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
}
static int
@@ -2674,7 +2667,7 @@ ssl_handle_server_connection_attempt(rpc_transport_t *this)
fd = priv->sock;
if (!priv->ssl_context_created) {
- ret = ssl_setup_connection_prefix(this);
+ ret = ssl_setup_connection_prefix(this, _gf_true);
if (ret < 0) {
gf_log(this->name, GF_LOG_TRACE,
"> ssl_setup_connection_prefix() failed!");
@@ -2687,8 +2680,8 @@ ssl_handle_server_connection_attempt(rpc_transport_t *this)
ret = ssl_complete_connection(this);
if (ret == 0) {
/* nothing to do */
- event_select_on(ctx->event_pool, fd, idx, 1, 0);
- event_handled(ctx->event_pool, fd, idx, gen);
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, 0);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
ret = 1;
} else {
if (errno == EAGAIN) {
@@ -2722,13 +2715,13 @@ ssl_handle_client_connection_attempt(rpc_transport_t *this)
/* SSL client */
if (priv->connect_failed) {
gf_log(this->name, GF_LOG_TRACE, ">>> disconnecting SSL socket");
- ret = socket_disconnect(this, _gf_false);
+ (void)socket_disconnect(this, _gf_false);
/* Force ret to be -1, as we are officially done with
this socket */
ret = -1;
} else {
if (!priv->ssl_context_created) {
- ret = ssl_setup_connection_prefix(this);
+ ret = ssl_setup_connection_prefix(this, _gf_false);
if (ret < 0) {
gf_log(this->name, GF_LOG_TRACE,
"> ssl_setup_connection_prefix() "
@@ -2742,7 +2735,7 @@ ssl_handle_client_connection_attempt(rpc_transport_t *this)
ret = ssl_complete_connection(this);
if (ret == 0) {
ret = socket_connect_finish(this);
- event_select_on(ctx->event_pool, fd, idx, 1, 0);
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, 0);
gf_log(this->name, GF_LOG_TRACE, ">>> completed client connect");
} else {
if (errno == EAGAIN) {
@@ -2752,7 +2745,7 @@ ssl_handle_client_connection_attempt(rpc_transport_t *this)
ret = 1;
} else {
/* this is a connection failure */
- ret = socket_connect_finish(this);
+ (void)socket_connect_finish(this);
gf_log(this->name, GF_LOG_TRACE,
"ssl_complete_connection "
"returned error");
@@ -2811,7 +2804,7 @@ socket_handle_client_connection_attempt(rpc_transport_t *this)
* return 1
*/
ret = 1;
- event_handled(ctx->event_pool, fd, idx, gen);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
}
}
return ret;
@@ -2847,7 +2840,7 @@ socket_complete_connection(rpc_transport_t *this)
* socket_server_event_handler()
*/
priv->accepted = _gf_true;
- event_handled(ctx->event_pool, fd, idx, gen);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
ret = 1;
} else {
ret = socket_handle_client_connection_attempt(this);
@@ -2857,7 +2850,7 @@ socket_complete_connection(rpc_transport_t *this)
}
/* reads rpc_requests during pollin */
-static int
+static void
socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
int poll_out, int poll_err, char event_thread_died)
{
@@ -2871,9 +2864,16 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
if (event_thread_died) {
/* to avoid duplicate notifications, notify only for listener sockets */
- return 0;
+ return;
}
+ /* At this point we are sure no other thread is using the transport because
+ * we cannot receive more events until we call gf_event_handled(). However
+ * this function may call gf_event_handled() in some cases. When this is
+ * done, the transport may be destroyed at any moment if another thread
+ * handled an error event. To prevent that we take a reference here. */
+ rpc_transport_ref(this);
+
GF_VALIDATE_OR_GOTO("socket", this, out);
GF_VALIDATE_OR_GOTO("socket", this->private, out);
GF_VALIDATE_OR_GOTO("socket", this->xl, out);
@@ -2882,21 +2882,19 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
priv = this->private;
ctx = this->ctx;
- pthread_mutex_lock(&priv->in_lock);
pthread_mutex_lock(&priv->out_lock);
{
priv->idx = idx;
priv->gen = gen;
}
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
gf_log(this->name, GF_LOG_TRACE, "%s (sock:%d) in:%d, out:%d, err:%d",
(priv->is_server ? "server" : "client"), priv->sock, poll_in,
poll_out, poll_err);
if (!poll_err) {
- if (!socket_is_connected(this)) {
+ if (!socket_is_connected(priv)) {
gf_log(this->name, GF_LOG_TRACE,
"%s (sock:%d) socket is not connected, "
"completing connection",
@@ -2912,7 +2910,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
if (ret > 0) {
gf_log(this->name, GF_LOG_TRACE,
"(sock:%d) returning to wait on socket", priv->sock);
- return 0;
+ goto out;
}
} else {
char *sock_type = (priv->is_server ? "Server" : "Client");
@@ -2952,6 +2950,13 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
socket_dump_info(sa, priv->is_server, priv->use_ssl, priv->sock,
this->name, "disconnecting from");
+ /* Dump the SSL error stack to clear any errors that may otherwise
+ * resurface in the future.
+ */
+ if (priv->use_ssl && priv->ssl_ssl) {
+ ssl_dump_error_stack(this->name);
+ }
+
/* Logging has happened already in earlier cases */
gf_log("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG),
"EPOLLERR - disconnecting (sock:%d) (%s)", priv->sock,
@@ -2963,14 +2968,14 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
rpc_transport_unref(this);
} else if (!notify_handled) {
- event_handled(ctx->event_pool, fd, idx, gen);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
}
out:
- return ret;
+ rpc_transport_unref(this);
}
-static int
+static void
socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
int poll_out, int poll_err, char event_thread_died)
{
@@ -2999,7 +3004,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
if (event_thread_died) {
rpc_transport_notify(this, RPC_TRANSPORT_EVENT_THREAD_DIED,
(void *)(unsigned long)gen);
- return 0;
+ return;
}
/* NOTE:
@@ -3009,6 +3014,12 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
* thread context while we are using it here.
*/
priv->idx = idx;
+ priv->gen = gen;
+
+ if (poll_err) {
+ socket_event_poll_err(this, gen, idx);
+ goto out;
+ }
if (poll_in) {
int aflags = 0;
@@ -3018,31 +3029,33 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
new_sock = sys_accept(priv->sock, SA(&new_sockaddr), &addrlen, aflags);
- event_handled(ctx->event_pool, fd, idx, gen);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
- if (new_sock == -1) {
+ if (new_sock < 0) {
gf_log(this->name, GF_LOG_WARNING, "accept on %d failed (%s)",
priv->sock, strerror(errno));
goto out;
}
- if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) {
- ret = __socket_nodelay(new_sock);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_WARNING,
- "setsockopt() failed for "
- "NODELAY (%s)",
- strerror(errno));
+ if (new_sockaddr.ss_family != AF_UNIX) {
+ if (priv->nodelay) {
+ ret = __socket_nodelay(new_sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "setsockopt() failed for "
+ "NODELAY (%s)",
+ strerror(errno));
+ }
}
- }
- if (priv->keepalive && new_sockaddr.ss_family != AF_UNIX) {
- ret = __socket_keepalive(new_sock, new_sockaddr.ss_family,
- priv->keepaliveintvl, priv->keepaliveidle,
- priv->keepalivecnt, priv->timeout);
- if (ret == -1)
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to set keep-alive: %s", strerror(errno));
+ if (priv->keepalive) {
+ ret = __socket_keepalive(
+ new_sock, new_sockaddr.ss_family, priv->keepaliveintvl,
+ priv->keepaliveidle, priv->keepalivecnt, priv->timeout);
+ if (ret != 0)
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set keep-alive: %s", strerror(errno));
+ }
}
new_trans = GF_CALLOC(1, sizeof(*new_trans), gf_common_mt_rpc_trans_t);
@@ -3056,7 +3069,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
}
ret = pthread_mutex_init(&new_trans->lock, NULL);
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_WARNING,
"pthread_mutex_init() failed: %s; closing newly accepted "
"socket %d",
@@ -3076,7 +3089,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
ret = getsockname(new_sock, SA(&new_trans->myinfo.sockaddr),
&new_trans->myinfo.sockaddr_len);
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_WARNING,
"getsockname on socket %d "
"failed (errno:%s); closing newly accepted socket",
@@ -3091,7 +3104,30 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
gf_log(this->name, GF_LOG_TRACE, "XXX server:%s, client:%s",
new_trans->myinfo.identifier, new_trans->peerinfo.identifier);
+ /* Make options available to local socket_init() to create new
+ * SSL_CTX per transport. A separate SSL_CTX per transport is
+ * required to avoid setting crl checking options for client
+ * connections. The verification options eventually get copied
+ * to the SSL object. Unfortunately, there's no way to identify
+ * whether socket_init() is being called after a client-side
+ * connect() or a server-side accept(). Although, we could pass
+ * a flag from the transport init() to the socket_init() and
+ * from this place, this doesn't identify the case where the
+ * server-side transport loading is done for the first time.
+ * Also, SSL doesn't apply for UNIX sockets.
+ */
+ if (new_sockaddr.ss_family != AF_UNIX)
+ new_trans->options = dict_ref(this->options);
+ new_trans->ctx = this->ctx;
+
ret = socket_init(new_trans);
+
+ /* reset options to NULL to avoid double free */
+ if (new_sockaddr.ss_family != AF_UNIX) {
+ dict_unref(new_trans->options);
+ new_trans->options = NULL;
+ }
+
if (ret != 0) {
gf_log(this->name, GF_LOG_WARNING,
"initialization of new_trans "
@@ -3133,7 +3169,6 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
new_priv->sock = new_sock;
new_priv->ssl_enabled = priv->ssl_enabled;
- new_priv->ssl_ctx = priv->ssl_ctx;
new_priv->connected = 1;
new_priv->is_server = _gf_true;
@@ -3160,8 +3195,8 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
*/
ret = rpc_transport_notify(this, RPC_TRANSPORT_ACCEPT, new_trans);
- if (ret != -1) {
- new_priv->idx = event_register(
+ if (ret >= 0) {
+ new_priv->idx = gf_event_register(
ctx->event_pool, new_sock, socket_event_handler, new_trans,
1, 0, new_trans->notify_poller_death);
if (new_priv->idx == -1) {
@@ -3198,7 +3233,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
rpc_transport_unref(new_trans);
}
- if (ret == -1) {
+ if (ret < 0) {
gf_log(this->name, GF_LOG_WARNING, "closing newly accepted socket");
sys_close(new_sock);
/* this unref is to actually cause the destruction of
@@ -3208,7 +3243,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
}
}
out:
- return ret;
+ return;
}
static int
@@ -3217,20 +3252,14 @@ socket_disconnect(rpc_transport_t *this, gf_boolean_t wait)
socket_private_t *priv = NULL;
int ret = -1;
- GF_VALIDATE_OR_GOTO("socket", this, out);
- GF_VALIDATE_OR_GOTO("socket", this->private, out);
-
priv = this->private;
- pthread_mutex_lock(&priv->in_lock);
pthread_mutex_lock(&priv->out_lock);
{
ret = __socket_disconnect(this);
}
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
-out:
return ret;
}
@@ -3329,10 +3358,9 @@ socket_connect(rpc_transport_t *this, int port)
goto err;
}
- pthread_mutex_lock(&priv->in_lock);
pthread_mutex_lock(&priv->out_lock);
{
- if (priv->sock != -1) {
+ if (priv->sock >= 0) {
gf_log_callingfn(this->name, GF_LOG_TRACE,
"connect () called on transport "
"already connected");
@@ -3346,7 +3374,7 @@ socket_connect(rpc_transport_t *this, int port)
ret = socket_client_get_remote_sockaddr(this, &sock_union.sa,
&sockaddr_len, &sa_family);
- if (ret == -1) {
+ if (ret < 0) {
/* logged inside client_get_remote_sockaddr */
goto unlock;
}
@@ -3365,7 +3393,7 @@ socket_connect(rpc_transport_t *this, int port)
this->peerinfo.sockaddr_len = sockaddr_len;
priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
- if (priv->sock == -1) {
+ if (priv->sock < 0) {
gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
strerror(errno));
ret = -1;
@@ -3377,7 +3405,7 @@ socket_connect(rpc_transport_t *this, int port)
*/
if (priv->windowsize != 0) {
if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
- sizeof(priv->windowsize)) < 0) {
+ sizeof(priv->windowsize)) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"setting receive window "
"size failed: %d: %d: %s",
@@ -3385,7 +3413,7 @@ socket_connect(rpc_transport_t *this, int port)
}
if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
- sizeof(priv->windowsize)) < 0) {
+ sizeof(priv->windowsize)) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"setting send window size "
"failed: %d: %d: %s",
@@ -3407,30 +3435,32 @@ socket_connect(rpc_transport_t *this, int port)
}
#endif
- if (priv->nodelay && (sa_family != AF_UNIX)) {
- ret = __socket_nodelay(priv->sock);
-
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "NODELAY on %d failed (%s)",
- priv->sock, strerror(errno));
+ if (sa_family != AF_UNIX) {
+ if (priv->nodelay) {
+ ret = __socket_nodelay(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "NODELAY on %d failed (%s)", priv->sock,
+ strerror(errno));
+ }
}
- }
- if (priv->keepalive && sa_family != AF_UNIX) {
- ret = __socket_keepalive(priv->sock, sa_family,
- priv->keepaliveintvl, priv->keepaliveidle,
- priv->keepalivecnt, priv->timeout);
- if (ret == -1)
- gf_log(this->name, GF_LOG_ERROR, "Failed to set keep-alive: %s",
- strerror(errno));
+ if (priv->keepalive) {
+ ret = __socket_keepalive(
+ priv->sock, sa_family, priv->keepaliveintvl,
+ priv->keepaliveidle, priv->keepalivecnt, priv->timeout);
+ if (ret != 0)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set keep-alive: %s", strerror(errno));
+ }
}
SA(&this->myinfo.sockaddr)->sa_family = SA(&this->peerinfo.sockaddr)
->sa_family;
/* If a source addr is explicitly specified, use it */
- ret = dict_get_str(this->options, "transport.socket.source-addr",
- &local_addr);
+ ret = dict_get_str_sizen(this->options, "transport.socket.source-addr",
+ &local_addr);
if (!ret && SA(&this->myinfo.sockaddr)->sa_family == AF_INET) {
addr = (struct sockaddr_in *)(&this->myinfo.sockaddr);
ret = inet_pton(AF_INET, local_addr, &(addr->sin_addr.s_addr));
@@ -3442,7 +3472,7 @@ socket_connect(rpc_transport_t *this, int port)
ret = client_bind(this, SA(&this->myinfo.sockaddr),
&this->myinfo.sockaddr_len, priv->sock);
- if (ret == -1) {
+ if (ret < 0) {
gf_log(this->name, GF_LOG_WARNING, "client bind failed: %s",
strerror(errno));
goto handler;
@@ -3451,7 +3481,7 @@ socket_connect(rpc_transport_t *this, int port)
/* make socket non-blocking for all types of sockets */
if (!priv->bio) {
ret = __socket_nonblock(priv->sock);
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_ERROR, "NBIO on %d failed (%s)",
priv->sock, strerror(errno));
goto handler;
@@ -3478,7 +3508,7 @@ socket_connect(rpc_transport_t *this, int port)
connect_attempted = _gf_true;
- if (ret == -1 && errno == ENOENT && ign_enoent) {
+ if ((ret != 0) && (errno == ENOENT) && ign_enoent) {
gf_log(this->name, GF_LOG_WARNING,
"Ignore failed connection attempt on %s, (%s) ",
this->peerinfo.identifier, strerror(errno));
@@ -3496,7 +3526,7 @@ socket_connect(rpc_transport_t *this, int port)
goto handler;
}
- if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
+ if ((ret != 0) && (errno != EINPROGRESS) && (errno != ENOENT)) {
/* For unix path based sockets, the socket path is
* cryptic (md5sum of path) and may not be useful for
* the user in debugging so log it in DEBUG
@@ -3541,9 +3571,9 @@ socket_connect(rpc_transport_t *this, int port)
refd = _gf_true;
this->listener = this;
- priv->idx = event_register(ctx->event_pool, priv->sock,
- socket_event_handler, this, 1, 1,
- this->notify_poller_death);
+ priv->idx = gf_event_register(ctx->event_pool, priv->sock,
+ socket_event_handler, this, 1, 1,
+ this->notify_poller_death);
if (priv->idx == -1) {
gf_log("", GF_LOG_WARNING,
"failed to register the event; "
@@ -3558,11 +3588,10 @@ socket_connect(rpc_transport_t *this, int port)
sock = priv->sock;
}
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
err:
- /* if sock != -1, then cleanup is done from the event handler */
- if (ret == -1 && sock == -1) {
+ /* if sock >= 0, then cleanup is done from the event handler */
+ if ((ret < 0) && (sock < 0)) {
/* Cleaup requires to send notification to upper layer which
intern holds the big_lock. There can be dead-lock situation
if big_lock is already held by the current thread.
@@ -3610,29 +3639,26 @@ socket_listen(rpc_transport_t *this)
myinfo = &this->myinfo;
ctx = this->ctx;
- pthread_mutex_lock(&priv->in_lock);
pthread_mutex_lock(&priv->out_lock);
{
sock = priv->sock;
}
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
- if (sock != -1) {
+ if (sock >= 0) {
gf_log_callingfn(this->name, GF_LOG_DEBUG, "already listening");
return ret;
}
ret = socket_server_get_local_sockaddr(this, SA(&sockaddr), &sockaddr_len,
&sa_family);
- if (ret == -1) {
+ if (ret < 0) {
return ret;
}
- pthread_mutex_lock(&priv->in_lock);
pthread_mutex_lock(&priv->out_lock);
{
- if (priv->sock != -1) {
+ if (priv->sock >= 0) {
gf_log(this->name, GF_LOG_DEBUG, "already listening");
goto unlock;
}
@@ -3642,7 +3668,7 @@ socket_listen(rpc_transport_t *this)
priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
- if (priv->sock == -1) {
+ if (priv->sock < 0) {
gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
strerror(errno));
goto unlock;
@@ -3653,7 +3679,7 @@ socket_listen(rpc_transport_t *this)
*/
if (priv->windowsize != 0) {
if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
- sizeof(priv->windowsize)) < 0) {
+ sizeof(priv->windowsize)) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"setting receive window size "
"failed: %d: %d: %s",
@@ -3661,7 +3687,7 @@ socket_listen(rpc_transport_t *this)
}
if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
- sizeof(priv->windowsize)) < 0) {
+ sizeof(priv->windowsize)) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"setting send window size failed:"
" %d: %d: %s",
@@ -3671,7 +3697,7 @@ socket_listen(rpc_transport_t *this)
if (priv->nodelay && (sa_family != AF_UNIX)) {
ret = __socket_nodelay(priv->sock);
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_ERROR,
"setsockopt() failed for NODELAY (%s)", strerror(errno));
}
@@ -3680,7 +3706,7 @@ socket_listen(rpc_transport_t *this)
if (!priv->bio) {
ret = __socket_nonblock(priv->sock);
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_ERROR,
"NBIO on socket %d failed "
"(errno:%s); closing socket",
@@ -3691,9 +3717,10 @@ socket_listen(rpc_transport_t *this)
}
}
+ /* coverity[SLEEP] */
ret = __socket_server_bind(this);
- if ((ret == -EADDRINUSE) || (ret == -1)) {
+ if (ret < 0) {
/* logged inside __socket_server_bind() */
gf_log(this->name, GF_LOG_ERROR,
"__socket_server_bind failed;"
@@ -3709,7 +3736,7 @@ socket_listen(rpc_transport_t *this)
ret = listen(priv->sock, priv->backlog);
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_ERROR,
"could not set socket %d to listen mode (errno:%s); "
"closing socket",
@@ -3721,9 +3748,9 @@ socket_listen(rpc_transport_t *this)
rpc_transport_ref(this);
- priv->idx = event_register(ctx->event_pool, priv->sock,
- socket_server_event_handler, this, 1, 0,
- this->notify_poller_death);
+ priv->idx = gf_event_register(ctx->event_pool, priv->sock,
+ socket_server_event_handler, this, 1, 0,
+ this->notify_poller_death);
if (priv->idx == -1) {
gf_log(this->name, GF_LOG_WARNING,
@@ -3738,7 +3765,6 @@ socket_listen(rpc_transport_t *this)
}
unlock:
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
out:
return ret;
@@ -3777,7 +3803,7 @@ socket_submit_outgoing_msg(rpc_transport_t *this, rpc_transport_msg_t *msg)
goto unlock;
if (list_empty(&priv->ioq)) {
- ret = __socket_ioq_churn_entry(this, entry, 1);
+ ret = __socket_ioq_churn_entry(this, entry);
if (ret == 0) {
need_append = 0;
@@ -3793,8 +3819,8 @@ socket_submit_outgoing_msg(rpc_transport_t *this, rpc_transport_msg_t *msg)
}
if (need_poll_out) {
/* first entry to wait. continue writing on POLLOUT */
- priv->idx = event_select_on(ctx->event_pool, priv->sock, priv->idx,
- -1, 1);
+ priv->idx = gf_event_select_on(ctx->event_pool, priv->sock,
+ priv->idx, -1, 1);
}
}
unlock:
@@ -3903,7 +3929,6 @@ socket_throttle(rpc_transport_t *this, gf_boolean_t onoff)
will never read() any more data until throttling
is turned off.
*/
- pthread_mutex_lock(&priv->in_lock);
pthread_mutex_lock(&priv->out_lock);
{
/* Throttling is useless on a disconnected transport. In fact,
@@ -3912,11 +3937,10 @@ socket_throttle(rpc_transport_t *this, gf_boolean_t onoff)
* registered fd mapping. */
if (priv->connected == 1)
- priv->idx = event_select_on(this->ctx->event_pool, priv->sock,
- priv->idx, (int)!onoff, -1);
+ priv->idx = gf_event_select_on(this->ctx->event_pool, priv->sock,
+ priv->idx, (int)!onoff, -1);
}
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
return 0;
}
@@ -3939,31 +3963,23 @@ reconfigure(rpc_transport_t *this, dict_t *options)
socket_private_t *priv = NULL;
gf_boolean_t tmp_bool = _gf_false;
char *optstr = NULL;
- int ret = 0;
+ int ret = -1;
uint32_t backlog = 0;
uint64_t windowsize = 0;
- uint32_t timeout = GF_NETWORK_TIMEOUT;
- int keepaliveidle = GF_KEEPALIVE_TIME;
- int keepaliveintvl = GF_KEEPALIVE_INTERVAL;
- int keepalivecnt = GF_KEEPALIVE_COUNT;
+ data_t *data;
GF_VALIDATE_OR_GOTO("socket", this, out);
GF_VALIDATE_OR_GOTO("socket", this->private, out);
- if (!this || !this->private) {
- ret = -1;
- goto out;
- }
-
priv = this->private;
- if (dict_get_str(options, "transport.socket.keepalive", &optstr) == 0) {
- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
+ if (dict_get_str_sizen(options, "transport.socket.keepalive", &optstr) ==
+ 0) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"'transport.socket.keepalive' takes only "
"boolean options, not taking any action");
priv->keepalive = 1;
- ret = -1;
goto out;
}
gf_log(this->name, GF_LOG_DEBUG,
@@ -3973,48 +3989,43 @@ reconfigure(rpc_transport_t *this, dict_t *options)
} else
priv->keepalive = 1;
- if (dict_get_int32(options, "transport.tcp-user-timeout",
- &(priv->timeout)) != 0)
- priv->timeout = timeout;
+ if (dict_get_int32_sizen(options, "transport.tcp-user-timeout",
+ &(priv->timeout)) != 0)
+ priv->timeout = GF_NETWORK_TIMEOUT;
gf_log(this->name, GF_LOG_DEBUG,
- "Reconfigued "
- "transport.tcp-user-timeout=%d",
- priv->timeout);
+ "Reconfigured transport.tcp-user-timeout=%d", priv->timeout);
if (dict_get_uint32(options, "transport.listen-backlog", &backlog) == 0) {
priv->backlog = backlog;
gf_log(this->name, GF_LOG_DEBUG,
- "Reconfigued "
- "transport.listen-backlog=%d",
- priv->backlog);
+ "Reconfigured transport.listen-backlog=%d", priv->backlog);
}
- if (dict_get_int32(options, "transport.socket.keepalive-time",
- &(priv->keepaliveidle)) != 0)
- priv->keepaliveidle = keepaliveidle;
- gf_log(this->name, GF_LOG_DEBUG,
- "Reconfigued "
- "transport.socket.keepalive-time=%d",
- priv->keepaliveidle);
+ if (priv->keepalive) {
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-time",
+ &(priv->keepaliveidle)) != 0)
+ priv->keepaliveidle = GF_KEEPALIVE_TIME;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-time=%d",
+ priv->keepaliveidle);
- if (dict_get_int32(options, "transport.socket.keepalive-interval",
- &(priv->keepaliveintvl)) != 0)
- priv->keepaliveintvl = keepaliveintvl;
- gf_log(this->name, GF_LOG_DEBUG,
- "Reconfigued "
- "transport.socket.keepalive-interval=%d",
- priv->keepaliveintvl);
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-interval",
+ &(priv->keepaliveintvl)) != 0)
+ priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-interval=%d",
+ priv->keepaliveintvl);
- if (dict_get_int32(options, "transport.socket.keepalive-count",
- &(priv->keepalivecnt)) != 0)
- priv->keepalivecnt = keepalivecnt;
- gf_log(this->name, GF_LOG_DEBUG,
- "Reconfigued "
- "transport.socket.keepalive-count=%d",
- priv->keepalivecnt);
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-count",
+ &(priv->keepalivecnt)) != 0)
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-count=%d",
+ priv->keepalivecnt);
+ }
optstr = NULL;
- if (dict_get_str(options, "tcp-window-size", &optstr) == 0) {
+ if (dict_get_str_sizen(options, "tcp-window-size", &optstr) == 0) {
if (gf_string2uint64(optstr, &windowsize) != 0) {
gf_log(this->name, GF_LOG_ERROR, "invalid number format: %s",
optstr);
@@ -4024,10 +4035,11 @@ reconfigure(rpc_transport_t *this, dict_t *options)
priv->windowsize = (int)windowsize;
- if (dict_get(options, "non-blocking-io")) {
- optstr = data_to_str(dict_get(options, "non-blocking-io"));
+ data = dict_get_sizen(options, "non-blocking-io");
+ if (data) {
+ optstr = data_to_str(data);
- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"'non-blocking-io' takes only boolean options,"
" not taking any action");
@@ -4042,7 +4054,7 @@ reconfigure(rpc_transport_t *this, dict_t *options)
if (!priv->bio) {
ret = __socket_nonblock(priv->sock);
- if (ret == -1) {
+ if (ret != 0) {
gf_log(this->name, GF_LOG_WARNING, "NBIO on %d failed (%s)",
priv->sock, strerror(errno));
goto out;
@@ -4156,6 +4168,34 @@ static void __attribute__((destructor)) fini_openssl_mt(void)
ERR_free_strings();
}
+/* The function returns 0 if AES bit is enabled on the CPU */
+static int
+ssl_check_aes_bit(void)
+{
+ FILE *fp = fopen("/proc/cpuinfo", "r");
+ int ret = 1;
+ size_t len = 0;
+ char *line = NULL;
+ char *match = NULL;
+
+ GF_ASSERT(fp != NULL);
+
+ while (getline(&line, &len, fp) > 0) {
+ if (!strncmp(line, "flags", 5)) {
+ match = strstr(line, " aes");
+ if ((match != NULL) && ((match[4] == ' ') || (match[4] == 0))) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+
+ free(line);
+ fclose(fp);
+
+ return ret;
+}
+
static int
ssl_setup_connection_params(rpc_transport_t *this)
{
@@ -4166,7 +4206,7 @@ ssl_setup_connection_params(rpc_transport_t *this)
char *cipher_list = DEFAULT_CIPHER_LIST;
char *dh_param = DEFAULT_DH_PARAM;
char *ec_curve = DEFAULT_EC_CURVE;
- char *crl_path = NULL;
+ gf_boolean_t dh_flag = _gf_false;
priv = this->private;
@@ -4175,8 +4215,16 @@ ssl_setup_connection_params(rpc_transport_t *this)
return 0;
}
+ if (!priv->ssl_enabled && !priv->mgmt_ssl) {
+ return 0;
+ }
+
+ if (!ssl_check_aes_bit()) {
+ cipher_list = "AES128:" DEFAULT_CIPHER_LIST;
+ }
+
priv->ssl_own_cert = DEFAULT_CERT_PATH;
- if (dict_get_str(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) {
+ if (dict_get_str_sizen(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) {
if (!priv->ssl_enabled) {
gf_log(this->name, GF_LOG_WARNING,
"%s specified without %s (ignored)", SSL_OWN_CERT_OPT,
@@ -4187,7 +4235,7 @@ ssl_setup_connection_params(rpc_transport_t *this)
priv->ssl_own_cert = gf_strdup(priv->ssl_own_cert);
priv->ssl_private_key = DEFAULT_KEY_PATH;
- if (dict_get_str(this->options, SSL_PRIVATE_KEY_OPT, &optstr) == 0) {
+ if (dict_get_str_sizen(this->options, SSL_PRIVATE_KEY_OPT, &optstr) == 0) {
if (!priv->ssl_enabled) {
gf_log(this->name, GF_LOG_WARNING,
"%s specified without %s (ignored)", SSL_PRIVATE_KEY_OPT,
@@ -4198,7 +4246,7 @@ ssl_setup_connection_params(rpc_transport_t *this)
priv->ssl_private_key = gf_strdup(priv->ssl_private_key);
priv->ssl_ca_list = DEFAULT_CA_PATH;
- if (dict_get_str(this->options, SSL_CA_LIST_OPT, &optstr) == 0) {
+ if (dict_get_str_sizen(this->options, SSL_CA_LIST_OPT, &optstr) == 0) {
if (!priv->ssl_enabled) {
gf_log(this->name, GF_LOG_WARNING,
"%s specified without %s (ignored)", SSL_CA_LIST_OPT,
@@ -4208,42 +4256,41 @@ ssl_setup_connection_params(rpc_transport_t *this)
}
priv->ssl_ca_list = gf_strdup(priv->ssl_ca_list);
- if (dict_get_str(this->options, SSL_CRL_PATH_OPT, &optstr) == 0) {
+ optstr = NULL;
+ if (dict_get_str_sizen(this->options, SSL_CRL_PATH_OPT, &optstr) == 0) {
if (!priv->ssl_enabled) {
gf_log(this->name, GF_LOG_WARNING,
"%s specified without %s (ignored)", SSL_CRL_PATH_OPT,
SSL_ENABLED_OPT);
}
if (strcasecmp(optstr, "NULL") == 0)
- crl_path = NULL;
+ priv->crl_path = NULL;
else
- crl_path = optstr;
+ priv->crl_path = gf_strdup(optstr);
}
- gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG,
- "SSL support on the I/O path is %s",
- priv->ssl_enabled ? "ENABLED" : "NOT enabled");
- gf_log(this->name, priv->mgmt_ssl ? GF_LOG_INFO : GF_LOG_DEBUG,
- "SSL support for glusterd is %s",
- priv->mgmt_ssl ? "ENABLED" : "NOT enabled");
-
if (!priv->mgmt_ssl) {
- if (!dict_get_int32(this->options, SSL_CERT_DEPTH_OPT, &cert_depth)) {
- gf_log(this->name, GF_LOG_INFO, "using certificate depth %d",
- cert_depth);
+ if (!dict_get_int32_sizen(this->options, SSL_CERT_DEPTH_OPT,
+ &cert_depth)) {
}
} else {
cert_depth = this->ctx->ssl_cert_depth;
- gf_log(this->name, GF_LOG_INFO, "using certificate depth %d",
- cert_depth);
}
- if (!dict_get_str(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) {
+ gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG,
+ "SSL support for MGMT is %s IO path is %s certificate depth is %d "
+ "for peer %s",
+ (priv->mgmt_ssl ? "ENABLED" : "NOT enabled"),
+ (priv->ssl_enabled ? "ENABLED" : "NOT enabled"), cert_depth,
+ this->peerinfo.identifier);
+
+ if (!dict_get_str_sizen(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) {
gf_log(this->name, GF_LOG_INFO, "using cipher list %s", cipher_list);
}
- if (!dict_get_str(this->options, SSL_DH_PARAM_OPT, &dh_param)) {
+ if (!dict_get_str_sizen(this->options, SSL_DH_PARAM_OPT, &dh_param)) {
+ dh_flag = _gf_true;
gf_log(this->name, GF_LOG_INFO, "using DH parameters %s", dh_param);
}
- if (!dict_get_str(this->options, SSL_EC_CURVE_OPT, &ec_curve)) {
+ if (!dict_get_str_sizen(this->options, SSL_EC_CURVE_OPT, &ec_curve)) {
gf_log(this->name, GF_LOG_INFO, "using EC curve %s", ec_curve);
}
@@ -4275,12 +4322,15 @@ ssl_setup_connection_params(rpc_transport_t *this)
#ifdef SSL_OP_NO_COMPRESSION
SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_COMPRESSION);
#endif
-
- if ((bio = BIO_new_file(dh_param, "r")) == NULL) {
- gf_log(this->name, GF_LOG_INFO,
- "failed to open %s, "
- "DH ciphers are disabled",
- dh_param);
+ /* Upload file to bio wrapper only if dh param is configured
+ */
+ if (dh_flag) {
+ if ((bio = BIO_new_file(dh_param, "r")) == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to open %s, "
+ "DH ciphers are disabled",
+ dh_param);
+ }
}
if (bio != NULL) {
@@ -4359,25 +4409,15 @@ ssl_setup_connection_params(rpc_transport_t *this)
}
if (!SSL_CTX_load_verify_locations(priv->ssl_ctx, priv->ssl_ca_list,
- crl_path)) {
+ priv->crl_path)) {
gf_log(this->name, GF_LOG_ERROR, "could not load CA list");
goto err;
}
SSL_CTX_set_verify_depth(priv->ssl_ctx, cert_depth);
- if (crl_path) {
-#ifdef X509_V_FLAG_CRL_CHECK_ALL
- X509_STORE *x509store;
-
- x509store = SSL_CTX_get_cert_store(priv->ssl_ctx);
- X509_STORE_set_flags(
- x509store, X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
-#else
- gf_log(this->name, GF_LOG_ERROR,
- "OpenSSL version does not support CRL");
-#endif
- }
+ if (priv->crl_path)
+ ssl_set_crl_verify_flags(priv->ssl_ctx);
priv->ssl_session_id = session_id++;
SSL_CTX_set_session_id_context(priv->ssl_ctx,
@@ -4407,25 +4447,19 @@ socket_init(rpc_transport_t *this)
gf_boolean_t tmp_bool = 0;
uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
char *optstr = NULL;
- uint32_t timeout = GF_NETWORK_TIMEOUT;
- int keepaliveidle = GF_KEEPALIVE_TIME;
- int keepaliveintvl = GF_KEEPALIVE_INTERVAL;
- int keepalivecnt = GF_KEEPALIVE_COUNT;
- uint32_t backlog = 0;
+ data_t *data;
if (this->private) {
gf_log_callingfn(this->name, GF_LOG_ERROR, "double init attempted");
return -1;
}
- priv = GF_MALLOC(sizeof(*priv), gf_common_mt_socket_private_t);
+ priv = GF_CALLOC(1, sizeof(*priv), gf_common_mt_socket_private_t);
if (!priv) {
return -1;
}
- memset(priv, 0, sizeof(*priv));
this->private = priv;
- pthread_mutex_init(&priv->in_lock, NULL);
pthread_mutex_init(&priv->out_lock, NULL);
pthread_mutex_init(&priv->cond_lock, NULL);
pthread_cond_init(&priv->cond, NULL);
@@ -4448,10 +4482,11 @@ socket_init(rpc_transport_t *this)
if (!this->options)
goto out;
- if (dict_get(this->options, "non-blocking-io")) {
- optstr = data_to_str(dict_get(this->options, "non-blocking-io"));
+ data = dict_get_sizen(this->options, "non-blocking-io");
+ if (data) {
+ optstr = data_to_str(data);
- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"'non-blocking-io' takes only boolean options,"
" not taking any action");
@@ -4467,11 +4502,11 @@ socket_init(rpc_transport_t *this)
optstr = NULL;
/* By default, we enable NODELAY */
- if (dict_get(this->options, "transport.socket.nodelay")) {
- optstr = data_to_str(
- dict_get(this->options, "transport.socket.nodelay"));
+ data = dict_get_sizen(this->options, "transport.socket.nodelay");
+ if (data) {
+ optstr = data_to_str(data);
- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"'transport.socket.nodelay' takes only "
"boolean options, not taking any action");
@@ -4484,7 +4519,7 @@ socket_init(rpc_transport_t *this)
}
optstr = NULL;
- if (dict_get_str(this->options, "tcp-window-size", &optstr) == 0) {
+ if (dict_get_str_sizen(this->options, "tcp-window-size", &optstr) == 0) {
if (gf_string2uint64(optstr, &windowsize) != 0) {
gf_log(this->name, GF_LOG_ERROR, "invalid number format: %s",
optstr);
@@ -4500,9 +4535,9 @@ socket_init(rpc_transport_t *this)
priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
priv->keepaliveidle = GF_KEEPALIVE_TIME;
priv->keepalivecnt = GF_KEEPALIVE_COUNT;
- if (dict_get_str(this->options, "transport.socket.keepalive", &optstr) ==
- 0) {
- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
+ if (dict_get_str_sizen(this->options, "transport.socket.keepalive",
+ &optstr) == 0) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"'transport.socket.keepalive' takes only "
"boolean options, not taking any action");
@@ -4513,46 +4548,46 @@ socket_init(rpc_transport_t *this)
priv->keepalive = 0;
}
- if (dict_get_int32(this->options, "transport.tcp-user-timeout",
- &(priv->timeout)) != 0)
- priv->timeout = timeout;
- gf_log(this->name, GF_LOG_DEBUG,
- "Configued "
- "transport.tcp-user-timeout=%d",
+ if (dict_get_int32_sizen(this->options, "transport.tcp-user-timeout",
+ &(priv->timeout)) != 0)
+ priv->timeout = GF_NETWORK_TIMEOUT;
+ gf_log(this->name, GF_LOG_DEBUG, "Configured transport.tcp-user-timeout=%d",
priv->timeout);
- if (dict_get_int32(this->options, "transport.socket.keepalive-time",
- &(priv->keepaliveidle)) != 0) {
- priv->keepaliveidle = keepaliveidle;
- }
+ if (priv->keepalive) {
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-time",
+ &(priv->keepaliveidle)) != 0) {
+ priv->keepaliveidle = GF_KEEPALIVE_TIME;
+ }
- if (dict_get_int32(this->options, "transport.socket.keepalive-interval",
- &(priv->keepaliveintvl)) != 0) {
- priv->keepaliveintvl = keepaliveintvl;
- }
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-interval",
+ &(priv->keepaliveintvl)) != 0) {
+ priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
+ }
- if (dict_get_int32(this->options, "transport.socket.keepalive-count",
- &(priv->keepalivecnt)) != 0)
- priv->keepalivecnt = keepalivecnt;
- gf_log(this->name, GF_LOG_DEBUG,
- "Reconfigued "
- "transport.keepalivecnt=%d",
- keepalivecnt);
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-count",
+ &(priv->keepalivecnt)) != 0)
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.keepalivecnt=%d", priv->keepalivecnt);
+ }
- if (dict_get_uint32(this->options, "transport.listen-backlog", &backlog) !=
- 0) {
- backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
+ if (dict_get_uint32(this->options, "transport.listen-backlog",
+ &(priv->backlog)) != 0) {
+ priv->backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
}
- priv->backlog = backlog;
optstr = NULL;
/* Check if socket read failures are to be logged */
priv->read_fail_log = 1;
- if (dict_get(this->options, "transport.socket.read-fail-log")) {
- optstr = data_to_str(
- dict_get(this->options, "transport.socket.read-fail-log"));
- if (gf_string2boolean(optstr, &tmp_bool) == -1) {
+ data = dict_get_sizen(this->options, "transport.socket.read-fail-log");
+ if (data) {
+ optstr = data_to_str(data);
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
gf_log(this->name, GF_LOG_WARNING,
"'transport.socket.read-fail-log' takes only "
"boolean options; logging socket read fails");
@@ -4564,7 +4599,7 @@ socket_init(rpc_transport_t *this)
priv->windowsize = (int)windowsize;
priv->ssl_enabled = _gf_false;
- if (dict_get_str(this->options, SSL_ENABLED_OPT, &optstr) == 0) {
+ if (dict_get_str_sizen(this->options, SSL_ENABLED_OPT, &optstr) == 0) {
if (gf_string2boolean(optstr, &priv->ssl_enabled) != 0) {
gf_log(this->name, GF_LOG_ERROR,
"invalid value given for ssl-enabled boolean");
@@ -4589,22 +4624,34 @@ fini(rpc_transport_t *this)
priv = this->private;
if (priv) {
- if (priv->sock != -1) {
- pthread_mutex_lock(&priv->in_lock);
+ if (priv->sock >= 0) {
pthread_mutex_lock(&priv->out_lock);
{
- __socket_ioq_flush(this);
+ __socket_ioq_flush(priv);
__socket_reset(this);
}
pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_unlock(&priv->in_lock);
}
gf_log(this->name, GF_LOG_TRACE, "transport %p destroyed", this);
- pthread_mutex_destroy(&priv->in_lock);
pthread_mutex_destroy(&priv->out_lock);
pthread_mutex_destroy(&priv->cond_lock);
pthread_cond_destroy(&priv->cond);
+
+ GF_ASSERT(priv->notify.in_progress == 0);
+ pthread_mutex_destroy(&priv->notify.lock);
+ pthread_cond_destroy(&priv->notify.cond);
+
+ if (priv->use_ssl && priv->ssl_ssl) {
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+ if (priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
+ }
+
if (priv->ssl_private_key) {
GF_FREE(priv->ssl_private_key);
}
@@ -4629,7 +4676,7 @@ init(rpc_transport_t *this)
ret = socket_init(this);
- if (ret == -1) {
+ if (ret < 0) {
gf_log(this->name, GF_LOG_DEBUG, "socket_init() failed");
}
@@ -4679,7 +4726,6 @@ struct volume_options options[] = {
{.key = {"transport.socket.nodelay"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "1"},
- {.key = {"transport.socket.lowlat"}, .type = GF_OPTION_TYPE_BOOL},
{.key = {"transport.socket.keepalive"},
.type = GF_OPTION_TYPE_BOOL,
.op_version = {1},
diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h
index f1bfba45076..8a2eda70605 100644
--- a/rpc/rpc-transport/socket/src/socket.h
+++ b/rpc/rpc-transport/socket/src/socket.h
@@ -14,6 +14,7 @@
#include <openssl/ssl.h>
#include <openssl/err.h>
#include <openssl/x509v3.h>
+#include <openssl/x509_vfy.h>
#ifdef HAVE_OPENSSL_DH_H
#include <openssl/dh.h>
#endif
@@ -21,13 +22,7 @@
#include <openssl/ecdh.h>
#endif
-#include "gf-event.h"
#include "rpc-transport.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-#include "globals.h"
-#include "refcount.h"
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
@@ -109,11 +104,12 @@ struct ioq {
};
struct iovec vector[MAX_IOVEC];
- int count;
struct iovec *pending_vector;
+ int count;
int pending_count;
struct iobref *iobref;
uint32_t fraghdr;
+ char _pad[4];
};
typedef struct {
@@ -172,13 +168,13 @@ struct gf_sock_incoming {
char *proghdr_base_addr;
struct iobuf *iobuf;
size_t iobuf_size;
- int count;
struct gf_sock_incoming_frag frag;
struct iovec vector[2];
struct iovec payload_vector;
struct iobref *iobref;
rpc_request_info_t *request_info;
struct iovec *pending_vector;
+ int count;
int pending_count;
size_t total_bytes_read;
@@ -187,19 +183,11 @@ struct gf_sock_incoming {
size_t ra_served;
char *ra_buf;
uint32_t fraghdr;
- char complete_record;
msg_type_t msg_type;
sp_rpcrecord_state_t record_state;
+ char _pad[4];
};
-typedef enum {
- OT_IDLE, /* Uninitialized or termination complete. */
- OT_SPAWNING, /* Past pthread_create but not in thread yet. */
- OT_RUNNING, /* Poller thread running normally. */
- OT_CALLBACK, /* Poller thread in the middle of a callback. */
- OT_PLEASE_DIE, /* Poller termination requested. */
-} ot_state_t;
-
typedef struct {
union {
struct list_head ioq;
@@ -208,11 +196,9 @@ typedef struct {
struct ioq *ioq_prev;
};
};
- pthread_mutex_t in_lock;
pthread_mutex_t out_lock;
pthread_mutex_t cond_lock;
pthread_cond_t cond;
- pthread_t thread;
int windowsize;
int keepalive;
int keepaliveidle;
@@ -220,12 +206,14 @@ typedef struct {
int keepalivecnt;
int timeout;
int log_ctr;
+ int shutdown_log_ctr;
/* ssl_error_required is used only during the SSL connection setup
* phase.
* It holds the error code returned by SSL_get_error() and is used to
* arm the epoll event set for the required event for the specific fd.
*/
int ssl_error_required;
+ int ssl_session_id;
GF_REF_DECL; /* refcount to keep track of socket_poller
threads */
@@ -240,14 +228,14 @@ typedef struct {
uint32_t backlog;
SSL_METHOD *ssl_meth;
SSL_CTX *ssl_ctx;
- int ssl_session_id;
BIO *ssl_sbio;
SSL *ssl_ssl;
char *ssl_own_cert;
char *ssl_private_key;
char *ssl_ca_list;
- int pipe[2];
+ char *crl_path;
struct gf_sock_incoming incoming;
+ mgmt_ssl_t srvr_ssl;
/* -1 = not connected. 0 = in progress. 1 = connected */
char connected;
/* 1 = connect failed for reasons other than EINPROGRESS/ENOENT
@@ -256,9 +244,7 @@ typedef struct {
char bio;
char connect_finish_log;
char submit_log;
- char lowlat;
char nodelay;
- mgmt_ssl_t srvr_ssl;
gf_boolean_t read_fail_log;
gf_boolean_t ssl_enabled; /* outbound I/O */
gf_boolean_t mgmt_ssl; /* outbound mgmt */
@@ -284,7 +270,7 @@ typedef struct {
* socket_event_handler() for
* newly accepted socket
*/
-
+ char _pad[4];
} socket_private_t;
#endif
diff --git a/rpc/xdr/gen/Makefile.am b/rpc/xdr/gen/Makefile.am
deleted file mode 100644
index df379b80130..00000000000
--- a/rpc/xdr/gen/Makefile.am
+++ /dev/null
@@ -1,49 +0,0 @@
-XDRGENFILES = glusterfs3-xdr.x glusterfs4-xdr.x cli1-xdr.x nlm4-xdr.x nsm-xdr.x \
- rpc-common-xdr.x glusterd1-xdr.x acl3-xdr.x portmap-xdr.x \
- mount3udp.x changelog-xdr.x glusterfs-fops.x
-XDRHEADERS = $(XDRGENFILES:.x=.h)
-XDRSOURCES = $(XDRGENFILES:.x=.c)
-
-CLEANFILES = $(XDRSOURCES) $(XDRHEADERS) $(XDRGENFILES)
-
-# trick automake into doing BUILT_SOURCES magic
-BUILT_SOURCES = $(XDRHEADERS) $(XDRSOURCES)
-
-xdrsrc=$(top_srcdir)/rpc/xdr/src
-xdrdst=$(top_builddir)/rpc/xdr/src
-
-# make's dependency resolution may mean that it decides to run
-# rpcgen again (unnecessarily), but as the .c file already exists,
-# rpcgen will exit with an error, resulting in a build error. We
-# could use a '-' (i.e. -@rpcgen ...) and suffer with noisy warnings
-# in the build. Or we do this crufty thing instead.
-$(XDRSOURCES): $(XDRGENFILES)
- @if [ ! -e $(xdrdst)/$@ -o $(@:.c=.x) -nt $(xdrdst)/$@ ]; then \
- rpcgen -c -o $(xdrdst)/$@ $(@:.c=.x) ;\
- fi
-
-# d*mn sed in netbsd6 doesn't do -i (inline)
-# (why are we still running smoke on netbsd6 and not netbsd7?)
-$(XDRHEADERS): $(XDRGENFILES)
- @if [ ! -e $(xdrdst)/$@ -o $(@:.h=.x) -nt $(xdrdst)/$@ ]; then \
- rpcgen -h -o $(@:.h=.tmp) $(@:.h=.x) && \
- sed -e '/#ifndef/ s/-/_/g' -e '/#define/ s/-/_/g' \
- -e '/#endif/ s/-/_/' -e 's/TMP_/H_/g' \
- $(@:.h=.tmp) > $(xdrdst)/$@ && \
- rm -f $(@:.h=.tmp) ; \
- fi
-
-
-# link .x files when doing out-of-tree builds
-# have to use .PHONY here to force it; all versions of make
-# will think the file already exists "here" by virtue of the
-# VPATH. And we have to have the .x file in $cwd in order to
-# have rpcgen generate "nice" #include directives
-# i.e. (nice):
-# #include "acl3-xdr.h"
-# versus (not nice):
-# #include "../../../../foo/src/rpc/xdr/src/acl3-xdr.h"
-.PHONY : $(XDRGENFILES)
-$(XDRGENFILES):
- @if [ ! -e $@ ]; then ln -s $(xdrsrc)/$@ . ; fi;
-
diff --git a/rpc/xdr/src/.gitignore b/rpc/xdr/src/.gitignore
index 6728940f546..a0c8b7ca2b6 100644
--- a/rpc/xdr/src/.gitignore
+++ b/rpc/xdr/src/.gitignore
@@ -10,8 +10,6 @@ glusterfs3-xdr.c
glusterfs3-xdr.h
glusterfs4-xdr.c
glusterfs4-xdr.h
-glusterfs-fops.h
-glusterfs-fops.c
mount3udp.c
mount3udp.h
nlm4-xdr.c
diff --git a/rpc/xdr/src/Makefile.am b/rpc/xdr/src/Makefile.am
index 495b9999236..0e9c377ec93 100644
--- a/rpc/xdr/src/Makefile.am
+++ b/rpc/xdr/src/Makefile.am
@@ -1,10 +1,19 @@
-XDRGENFILES = glusterfs3-xdr.x glusterfs4-xdr.x cli1-xdr.x nlm4-xdr.x nsm-xdr.x \
- rpc-common-xdr.x glusterd1-xdr.x acl3-xdr.x portmap-xdr.x \
- mount3udp.x changelog-xdr.x glusterfs-fops.x
+if BUILD_GNFS
+ NFS_XDRS = nlm4-xdr.x nsm-xdr.x acl3-xdr.x mount3udp.x
+ NFS_SRCS = xdr-nfs3.c msg-nfs3.c
+ NFS_HDRS = xdr-nfs3.h msg-nfs3.h
+else
+ NFS_EXTRA_XDRS = nlm4-xdr.x nsm-xdr.x acl3-xdr.x mount3udp.x
+endif
+
+XDRGENFILES = glusterfs3-xdr.x glusterfs4-xdr.x cli1-xdr.x \
+ rpc-common-xdr.x glusterd1-xdr.x changelog-xdr.x \
+ portmap-xdr.x ${NFS_XDRS}
+
XDRHEADERS = $(XDRGENFILES:.x=.h)
XDRSOURCES = $(XDRGENFILES:.x=.c)
-EXTRA_DIST = $(XDRGENFILES) libgfxdr.sym
+EXTRA_DIST = $(XDRGENFILES) libgfxdr.sym ${NFS_EXTRA_XDRS}
lib_LTLIBRARIES = libgfxdr.la
@@ -19,17 +28,58 @@ libgfxdr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
libgfxdr_la_LDFLAGS = -version-info $(LIBGFXDR_LT_VERSION) $(GF_LDFLAGS) \
-export-symbols $(top_srcdir)/rpc/xdr/src/libgfxdr.sym
-libgfxdr_la_SOURCES = xdr-generic.c xdr-nfs3.c msg-nfs3.c
+libgfxdr_la_SOURCES = xdr-generic.c ${NFS_SRCS}
nodist_libgfxdr_la_SOURCES = $(XDRSOURCES)
-libgfxdr_la_HEADERS = xdr-generic.h xdr-nfs3.h msg-nfs3.h glusterfs3.h \
- rpc-pragmas.h
+libgfxdr_la_HEADERS = xdr-generic.h glusterfs3.h rpc-pragmas.h ${NFS_HDRS}
nodist_libgfxdr_la_HEADERS = $(XDRHEADERS)
libgfxdr_ladir = $(includedir)/glusterfs/rpc
CLEANFILES = $(XDRSOURCES) $(XDRHEADERS)
-# Generate the .c and .h symlinks from the ../gen/*.x files
-$(XDRSOURCES) $(XDRHEADERS):
- $(MAKE) -C ../gen $^
+# trick automake into doing BUILT_SOURCES magic
+BUILT_SOURCES = $(XDRHEADERS) $(XDRSOURCES)
+
+xdrsrc=$(top_srcdir)/rpc/xdr/src
+xdrdst=$(top_builddir)/rpc/xdr/src
+
+# make's dependency resolution may mean that it decides to run
+# rpcgen again (unnecessarily), but as the .c file already exists,
+# rpcgen will exit with an error, resulting in a build error. We
+# could use a '-' (i.e. -@rpcgen ...) and suffer with noisy warnings
+# in the build. Or we do this crufty thing instead.
+$(XDRSOURCES): $(XDRGENFILES)
+ @if [ ! -e $(xdrdst)/$@ -o $(@:.c=.x) -nt $(xdrdst)/$@ ]; then \
+ rpcgen -c -o $(xdrdst)/$@ $(@:.c=.x) ;\
+ fi
+
+# d*mn sed in netbsd6 doesn't do -i (inline)
+# (why are we still running smoke on netbsd6 and not netbsd7?)
+$(XDRHEADERS): $(XDRGENFILES)
+ @if [ ! -e $(xdrdst)/$@ -o $(@:.h=.x) -nt $(xdrdst)/$@ ]; then \
+ rpcgen -h -o $(@:.h=.tmp) $(@:.h=.x) && \
+ sed -e '/#ifndef/ s/-/_/g' -e '/#define/ s/-/_/g' \
+ -e '/#endif/ s/-/_/' -e 's/TMP_/H_/g' \
+ $(@:.h=.tmp) > $(xdrdst)/$@ && \
+ rm -f $(@:.h=.tmp) ; \
+ fi
+
+
+# link .x files when doing out-of-tree builds
+# have to use .PHONY here to force it; all versions of make
+# will think the file already exists "here" by virtue of the
+# VPATH. And we have to have the .x file in $cwd in order to
+# have rpcgen generate "nice" #include directives
+# i.e. (nice):
+# #include "acl3-xdr.h"
+# versus (not nice):
+# #include "../../../../foo/src/rpc/xdr/src/acl3-xdr.h"
+.PHONY : $(XDRGENFILES)
+$(XDRGENFILES):
+ @if [ ! -e $@ ]; then ln -s $(xdrsrc)/$@ . ; fi;
+
+clean-local:
+ @if [ $(top_builddir) != $(top_srcdir) ]; then \
+ rm -f $(xdrdst)/*.x; \
+ fi
diff --git a/rpc/xdr/src/acl3-xdr.x b/rpc/xdr/src/acl3-xdr.x
index 44f1ed4fac9..7f7364971e6 100644
--- a/rpc/xdr/src/acl3-xdr.x
+++ b/rpc/xdr/src/acl3-xdr.x
@@ -11,7 +11,7 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/compat.h>
%#include "xdr-nfs3.h"
struct aclentry {
diff --git a/rpc/xdr/src/changelog-xdr.x b/rpc/xdr/src/changelog-xdr.x
index 97cd4558ccb..5956245d5ce 100644
--- a/rpc/xdr/src/changelog-xdr.x
+++ b/rpc/xdr/src/changelog-xdr.x
@@ -11,7 +11,7 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/compat.h>
/* XDR: libgfchangelog -> changelog */
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 6f5fd256954..777cb0046a2 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -11,7 +11,7 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/compat.h>
enum gf_cli_defrag_type {
GF_DEFRAG_CMD_NONE = 0,
@@ -68,6 +68,7 @@ enum gf_bitrot_type {
GF_BITROT_OPTION_TYPE_EXPIRY_TIME,
GF_BITROT_CMD_SCRUB_STATUS,
GF_BITROT_CMD_SCRUB_ONDEMAND,
+ GF_BITROT_OPTION_TYPE_SIGNER_THREADS,
GF_BITROT_OPTION_TYPE_MAX
};
diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x
index 1ce57392b5b..b631dea3502 100644
--- a/rpc/xdr/src/glusterd1-xdr.x
+++ b/rpc/xdr/src/glusterd1-xdr.x
@@ -11,7 +11,7 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/compat.h>
enum glusterd_volume_status {
GLUSTERD_STATUS_NONE = 0,
@@ -132,6 +132,7 @@ struct gd1_mgmt_brick_op_req {
string name<>;
int op;
opaque input<>;
+ opaque dict<>;
} ;
struct gd1_mgmt_brick_op_rsp {
@@ -201,6 +202,21 @@ struct gd1_mgmt_v3_commit_rsp {
string op_errstr<>;
} ;
+struct gd1_mgmt_v3_post_commit_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_post_commit_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ string op_errstr<>;
+} ;
+
struct gd1_mgmt_v3_post_val_req {
unsigned char uuid[16];
int op;
diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x
deleted file mode 100644
index 8ccc4658f6c..00000000000
--- a/rpc/xdr/src/glusterfs-fops.x
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- * This file is part of GlusterFS.
- *
- * This file is licensed to you under your choice of the GNU Lesser
- * General Public License, version 3 or any later version (LGPLv3 or
- * later), or the GNU General Public License, version 2 (GPLv2), in all
- * cases as published by the Free Software Foundation.
- */
-
-#ifdef RPC_XDR
-%#include "rpc-pragmas.h"
-#endif
-%#include "compat.h"
-
-/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
-/*
- * OTHER NOTE: fop_enum_to_str and fop_enum_to_pri_str (in common-utils.h) also
- * contain lists of fops, so if you update this list UPDATE THOSE TOO.
- */
-enum glusterfs_fop_t {
- GF_FOP_NULL = 0,
- GF_FOP_STAT,
- GF_FOP_READLINK,
- GF_FOP_MKNOD,
- GF_FOP_MKDIR,
- GF_FOP_UNLINK,
- GF_FOP_RMDIR,
- GF_FOP_SYMLINK,
- GF_FOP_RENAME,
- GF_FOP_LINK,
- GF_FOP_TRUNCATE,
- GF_FOP_OPEN,
- GF_FOP_READ,
- GF_FOP_WRITE,
- GF_FOP_STATFS,
- GF_FOP_FLUSH,
- GF_FOP_FSYNC, /* 16 */
- GF_FOP_SETXATTR,
- GF_FOP_GETXATTR,
- GF_FOP_REMOVEXATTR,
- GF_FOP_OPENDIR,
- GF_FOP_FSYNCDIR,
- GF_FOP_ACCESS,
- GF_FOP_CREATE,
- GF_FOP_FTRUNCATE,
- GF_FOP_FSTAT, /* 25 */
- GF_FOP_LK,
- GF_FOP_LOOKUP,
- GF_FOP_READDIR,
- GF_FOP_INODELK,
- GF_FOP_FINODELK,
- GF_FOP_ENTRYLK,
- GF_FOP_FENTRYLK,
- GF_FOP_XATTROP,
- GF_FOP_FXATTROP,
- GF_FOP_FGETXATTR,
- GF_FOP_FSETXATTR,
- GF_FOP_RCHECKSUM,
- GF_FOP_SETATTR,
- GF_FOP_FSETATTR,
- GF_FOP_READDIRP,
- GF_FOP_FORGET,
- GF_FOP_RELEASE,
- GF_FOP_RELEASEDIR,
- GF_FOP_GETSPEC,
- GF_FOP_FREMOVEXATTR,
- GF_FOP_FALLOCATE,
- GF_FOP_DISCARD,
- GF_FOP_ZEROFILL,
- GF_FOP_IPC,
- GF_FOP_SEEK,
- GF_FOP_LEASE,
- GF_FOP_COMPOUND,
- GF_FOP_GETACTIVELK,
- GF_FOP_SETACTIVELK,
- GF_FOP_PUT,
- GF_FOP_ICREATE,
- GF_FOP_NAMELINK,
- GF_FOP_MAXVALUE
-};
-
-/* Note: Removed event GF_EVENT_CHILD_MODIFIED=8, hence
- *to preserve backward compatibiliy, GF_EVENT_CLEANUP = 9
- */
-enum glusterfs_event_t {
- GF_EVENT_PARENT_UP = 1,
- GF_EVENT_POLLIN,
- GF_EVENT_POLLOUT,
- GF_EVENT_POLLERR,
- GF_EVENT_CHILD_UP,
- GF_EVENT_CHILD_DOWN,
- GF_EVENT_CHILD_CONNECTING,
- GF_EVENT_CLEANUP = 9,
- GF_EVENT_TRANSPORT_CONNECTED,
- GF_EVENT_VOLFILE_MODIFIED,
- GF_EVENT_GRAPH_NEW,
- GF_EVENT_TRANSLATOR_INFO,
- GF_EVENT_TRANSLATOR_OP,
- GF_EVENT_AUTH_FAILED,
- GF_EVENT_VOLUME_DEFRAG,
- GF_EVENT_PARENT_DOWN,
- GF_EVENT_VOLUME_BARRIER_OP,
- GF_EVENT_UPCALL,
- GF_EVENT_SCRUB_STATUS,
- GF_EVENT_SOME_DESCENDENT_DOWN,
- GF_EVENT_SCRUB_ONDEMAND,
- GF_EVENT_SOME_DESCENDENT_UP,
- GF_EVENT_CHILD_PING,
- GF_EVENT_MAXVAL
-};
-
-/* List of compound fops. Add fops at the end. */
-enum glusterfs_compound_fop_t {
- GF_CFOP_NON_PREDEFINED = 0, /* needs single FOP inspection */
- GF_CFOP_XATTROP_WRITEV,
- GF_CFOP_XATTROP_UNLOCK,
- GF_CFOP_PUT, /* create+write+setxattr+fsync+close+rename */
- GF_CFOP_MAXVALUE
-};
-
-enum glusterfs_mgmt_t {
- GF_MGMT_NULL = 0,
- GF_MGMT_MAXVALUE
-};
-
-enum gf_op_type_t {
- GF_OP_TYPE_NULL = 0,
- GF_OP_TYPE_FOP,
- GF_OP_TYPE_MGMT,
- GF_OP_TYPE_MAX
-};
-
-/* NOTE: all the miscellaneous flags used by GlusterFS should be listed here */
-enum glusterfs_lk_cmds_t {
- GF_LK_GETLK = 0,
- GF_LK_SETLK,
- GF_LK_SETLKW,
- GF_LK_RESLK_LCK,
- GF_LK_RESLK_LCKW,
- GF_LK_RESLK_UNLCK,
- GF_LK_GETLK_FD
-};
-
-enum glusterfs_lk_types_t {
- GF_LK_F_RDLCK = 0,
- GF_LK_F_WRLCK,
- GF_LK_F_UNLCK,
- GF_LK_EOL
-};
-
-/* Lease Types */
-enum gf_lease_types_t {
- NONE = 0,
- GF_RD_LEASE = 1,
- GF_RW_LEASE = 2,
- GF_LEASE_MAX_TYPE
-};
-
-/* Lease cmds */
-enum gf_lease_cmds_t {
- GF_GET_LEASE = 1,
- GF_SET_LEASE = 2,
- GF_UNLK_LEASE = 3
-};
-
-%#define LEASE_ID_SIZE 16 /* 128bits */
-struct gf_lease {
- gf_lease_cmds_t cmd;
- gf_lease_types_t lease_type;
- char lease_id[LEASE_ID_SIZE];
- unsigned int lease_flags;
-};
-
-enum glusterfs_lk_recovery_cmds_t {
- F_RESLK_LCK = 200,
- F_RESLK_LCKW,
- F_RESLK_UNLCK,
- F_GETLK_FD
-};
-
-enum gf_lk_domain_t {
- GF_LOCK_POSIX,
- GF_LOCK_INTERNAL
-};
-
-enum entrylk_cmd {
- ENTRYLK_LOCK,
- ENTRYLK_UNLOCK,
- ENTRYLK_LOCK_NB
-};
-
-enum entrylk_type {
- ENTRYLK_RDLCK,
- ENTRYLK_WRLCK
-};
-
-%#define GF_MAX_LOCK_OWNER_LEN 1024 /* 1kB as per NLM */
-
-/* 16strings-16strings-... */
-%#define GF_LKOWNER_BUF_SIZE ((GF_MAX_LOCK_OWNER_LEN * 2) + (GF_MAX_LOCK_OWNER_LEN / 8))
-
-struct gf_lkowner_t {
- int len;
- char data[GF_MAX_LOCK_OWNER_LEN];
-};
-
-enum gf_xattrop_flags_t {
- GF_XATTROP_ADD_ARRAY,
- GF_XATTROP_ADD_ARRAY64,
- GF_XATTROP_OR_ARRAY,
- GF_XATTROP_AND_ARRAY,
- GF_XATTROP_GET_AND_SET,
- GF_XATTROP_ADD_ARRAY_WITH_DEFAULT,
- GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT
-};
-
-enum gf_seek_what_t {
- GF_SEEK_DATA,
- GF_SEEK_HOLE
-};
-
-enum gf_upcall_flags_t {
- GF_UPCALL_NULL,
- GF_UPCALL,
- GF_UPCALL_CI_STAT,
- GF_UPCALL_CI_XATTR,
- GF_UPCALL_CI_RENAME,
- GF_UPCALL_CI_NLINK,
- GF_UPCALL_CI_FORGET,
- GF_UPCALL_LEASE_RECALL,
- GF_UPCALL_FLAGS_MAXVALUE
-};
-
-enum gf_dict_data_type_t {
- GF_DATA_TYPE_UNKNOWN,
- GF_DATA_TYPE_STR_OLD, /* Will be set by volgen and dict-serialize
- and unserialize. Used to reduce warnings
- if one is using old protocol */
- GF_DATA_TYPE_INT,
- GF_DATA_TYPE_UINT,
- GF_DATA_TYPE_DOUBLE,
- GF_DATA_TYPE_STR,
- GF_DATA_TYPE_PTR,
- GF_DATA_TYPE_GFUUID,
- GF_DATA_TYPE_IATT,
- GF_DATA_TYPE_MAX
-};
diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x
index c253db5de5b..1c99099a721 100644
--- a/rpc/xdr/src/glusterfs3-xdr.x
+++ b/rpc/xdr/src/glusterfs3-xdr.x
@@ -11,9 +11,8 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/glusterfs-fops.h>
%#include "rpc-common-xdr.h"
-%#include "glusterfs-fops.h"
#define GF_REQUEST_MAXGROUPS 16
struct gf_statfs {
@@ -859,126 +858,6 @@ struct gf_getsnap_name_uuid_rsp {
opaque dict<>;
};
-union compound_req switch (glusterfs_fop_t fop_enum) {
- case GF_FOP_STAT: gfs3_stat_req compound_stat_req;
- case GF_FOP_READLINK: gfs3_readlink_req compound_readlink_req;
- case GF_FOP_MKNOD: gfs3_mknod_req compound_mknod_req;
- case GF_FOP_MKDIR: gfs3_mkdir_req compound_mkdir_req;
- case GF_FOP_UNLINK: gfs3_unlink_req compound_unlink_req;
- case GF_FOP_RMDIR: gfs3_rmdir_req compound_rmdir_req;
- case GF_FOP_SYMLINK: gfs3_symlink_req compound_symlink_req;
- case GF_FOP_RENAME: gfs3_rename_req compound_rename_req;
- case GF_FOP_LINK: gfs3_link_req compound_link_req;
- case GF_FOP_TRUNCATE: gfs3_truncate_req compound_truncate_req;
- case GF_FOP_OPEN: gfs3_open_req compound_open_req;
- case GF_FOP_READ: gfs3_read_req compound_read_req;
- case GF_FOP_WRITE: gfs3_write_req compound_write_req;
- case GF_FOP_STATFS: gfs3_statfs_req compound_statfs_req;
- case GF_FOP_FLUSH: gfs3_flush_req compound_flush_req;
- case GF_FOP_FSYNC: gfs3_fsync_req compound_fsync_req;
- case GF_FOP_GETXATTR: gfs3_getxattr_req compound_getxattr_req;
- case GF_FOP_SETXATTR: gfs3_setxattr_req compound_setxattr_req;
- case GF_FOP_REMOVEXATTR: gfs3_removexattr_req compound_removexattr_req;
- case GF_FOP_OPENDIR: gfs3_opendir_req compound_opendir_req;
- case GF_FOP_FSYNCDIR: gfs3_fsyncdir_req compound_fsyncdir_req;
- case GF_FOP_ACCESS: gfs3_access_req compound_access_req;
- case GF_FOP_CREATE: gfs3_create_req compound_create_req;
- case GF_FOP_FTRUNCATE: gfs3_ftruncate_req compound_ftruncate_req;
- case GF_FOP_FSTAT: gfs3_fstat_req compound_fstat_req;
- case GF_FOP_LK: gfs3_lk_req compound_lk_req;
- case GF_FOP_LOOKUP: gfs3_lookup_req compound_lookup_req;
- case GF_FOP_READDIR: gfs3_readdir_req compound_readdir_req;
- case GF_FOP_INODELK: gfs3_inodelk_req compound_inodelk_req;
- case GF_FOP_FINODELK: gfs3_finodelk_req compound_finodelk_req;
- case GF_FOP_ENTRYLK: gfs3_entrylk_req compound_entrylk_req;
- case GF_FOP_FENTRYLK: gfs3_fentrylk_req compound_fentrylk_req;
- case GF_FOP_XATTROP: gfs3_xattrop_req compound_xattrop_req;
- case GF_FOP_FXATTROP: gfs3_fxattrop_req compound_fxattrop_req;
- case GF_FOP_FGETXATTR: gfs3_fgetxattr_req compound_fgetxattr_req;
- case GF_FOP_FSETXATTR: gfs3_fsetxattr_req compound_fsetxattr_req;
- case GF_FOP_RCHECKSUM: gfs3_rchecksum_req compound_rchecksum_req;
- case GF_FOP_SETATTR: gfs3_setattr_req compound_setattr_req;
- case GF_FOP_FSETATTR: gfs3_fsetattr_req compound_fsetattr_req;
- case GF_FOP_READDIRP: gfs3_readdirp_req compound_readdirp_req;
- case GF_FOP_RELEASE: gfs3_release_req compound_release_req;
- case GF_FOP_RELEASEDIR: gfs3_releasedir_req compound_releasedir_req;
- case GF_FOP_FREMOVEXATTR: gfs3_fremovexattr_req compound_fremovexattr_req;
- case GF_FOP_FALLOCATE: gfs3_fallocate_req compound_fallocate_req;
- case GF_FOP_DISCARD: gfs3_discard_req compound_discard_req;
- case GF_FOP_ZEROFILL: gfs3_zerofill_req compound_zerofill_req;
- case GF_FOP_IPC: gfs3_ipc_req compound_ipc_req;
- case GF_FOP_SEEK: gfs3_seek_req compound_seek_req;
- case GF_FOP_LEASE: gfs3_lease_req compound_lease_req;
- default: void;
-};
-
-struct gfs3_compound_req {
- int compound_version;
- glusterfs_compound_fop_t compound_fop_enum;
- compound_req compound_req_array<>;
- opaque xdata<>;
-};
-
-union compound_rsp switch (glusterfs_fop_t fop_enum) {
- case GF_FOP_STAT: gfs3_stat_rsp compound_stat_rsp;
- case GF_FOP_READLINK: gfs3_readlink_rsp compound_readlink_rsp;
- case GF_FOP_MKNOD: gfs3_mknod_rsp compound_mknod_rsp;
- case GF_FOP_MKDIR: gfs3_mkdir_rsp compound_mkdir_rsp;
- case GF_FOP_UNLINK: gfs3_unlink_rsp compound_unlink_rsp;
- case GF_FOP_RMDIR: gfs3_rmdir_rsp compound_rmdir_rsp;
- case GF_FOP_SYMLINK: gfs3_symlink_rsp compound_symlink_rsp;
- case GF_FOP_RENAME: gfs3_rename_rsp compound_rename_rsp;
- case GF_FOP_LINK: gfs3_link_rsp compound_link_rsp;
- case GF_FOP_TRUNCATE: gfs3_truncate_rsp compound_truncate_rsp;
- case GF_FOP_OPEN: gfs3_open_rsp compound_open_rsp;
- case GF_FOP_READ: gfs3_read_rsp compound_read_rsp;
- case GF_FOP_WRITE: gfs3_write_rsp compound_write_rsp;
- case GF_FOP_STATFS: gfs3_statfs_rsp compound_statfs_rsp;
- case GF_FOP_FLUSH: gf_common_rsp compound_flush_rsp;
- case GF_FOP_FSYNC: gfs3_fsync_rsp compound_fsync_rsp;
- case GF_FOP_GETXATTR: gfs3_getxattr_rsp compound_getxattr_rsp;
- case GF_FOP_SETXATTR: gf_common_rsp compound_setxattr_rsp;
- case GF_FOP_REMOVEXATTR: gf_common_rsp compound_removexattr_rsp;
- case GF_FOP_OPENDIR: gfs3_opendir_rsp compound_opendir_rsp;
- case GF_FOP_FSYNCDIR: gf_common_rsp compound_fsyncdir_rsp;
- case GF_FOP_ACCESS: gf_common_rsp compound_access_rsp;
- case GF_FOP_CREATE: gfs3_create_rsp compound_create_rsp;
- case GF_FOP_FTRUNCATE: gfs3_ftruncate_rsp compound_ftruncate_rsp;
- case GF_FOP_FSTAT: gfs3_fstat_rsp compound_fstat_rsp;
- case GF_FOP_LK: gfs3_lk_rsp compound_lk_rsp;
- case GF_FOP_LOOKUP: gfs3_lookup_rsp compound_lookup_rsp;
- case GF_FOP_READDIR: gfs3_readdir_rsp compound_readdir_rsp;
- case GF_FOP_INODELK: gf_common_rsp compound_inodelk_rsp;
- case GF_FOP_FINODELK: gf_common_rsp compound_finodelk_rsp;
- case GF_FOP_ENTRYLK: gf_common_rsp compound_entrylk_rsp;
- case GF_FOP_FENTRYLK: gf_common_rsp compound_fentrylk_rsp;
- case GF_FOP_XATTROP: gfs3_xattrop_rsp compound_xattrop_rsp;
- case GF_FOP_FXATTROP: gfs3_fxattrop_rsp compound_fxattrop_rsp;
- case GF_FOP_FGETXATTR: gfs3_fgetxattr_rsp compound_fgetxattr_rsp;
- case GF_FOP_FSETXATTR: gf_common_rsp compound_fsetxattr_rsp;
- case GF_FOP_RCHECKSUM: gfs3_rchecksum_rsp compound_rchecksum_rsp;
- case GF_FOP_SETATTR: gfs3_setattr_rsp compound_setattr_rsp;
- case GF_FOP_FSETATTR: gfs3_fsetattr_rsp compound_fsetattr_rsp;
- case GF_FOP_READDIRP: gfs3_readdirp_rsp compound_readdirp_rsp;
- case GF_FOP_RELEASE: gf_common_rsp compound_release_rsp;
- case GF_FOP_RELEASEDIR: gf_common_rsp compound_releasedir_rsp;
- case GF_FOP_FREMOVEXATTR: gf_common_rsp compound_fremovexattr_rsp;
- case GF_FOP_FALLOCATE: gfs3_fallocate_rsp compound_fallocate_rsp;
- case GF_FOP_DISCARD: gfs3_discard_rsp compound_discard_rsp;
- case GF_FOP_ZEROFILL: gfs3_zerofill_rsp compound_zerofill_rsp;
- case GF_FOP_IPC: gfs3_ipc_rsp compound_ipc_rsp;
- case GF_FOP_SEEK: gfs3_seek_rsp compound_seek_rsp;
- case GF_FOP_LEASE: gfs3_lease_rsp compound_lease_rsp;
- default: void;
-};
-
-struct gfs3_compound_rsp {
- int op_ret;
- int op_errno;
- compound_rsp compound_rsp_array<>;
- opaque xdata<>;
-};
-
struct gfs3_locklist {
gf_proto_flock flock;
string client_uid<>;
diff --git a/rpc/xdr/src/glusterfs3.h b/rpc/xdr/src/glusterfs3.h
index b8ef394abaf..86b3a4c0e5d 100644
--- a/rpc/xdr/src/glusterfs3.h
+++ b/rpc/xdr/src/glusterfs3.h
@@ -16,9 +16,9 @@
#include "xdr-generic.h"
#include "glusterfs3-xdr.h"
#include "glusterfs4-xdr.h"
-#include "iatt.h"
+#include <glusterfs/iatt.h>
#include "protocol-common.h"
-#include "upcall-utils.h"
+#include <glusterfs/upcall-utils.h>
#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
@@ -585,6 +585,34 @@ out:
}
static inline void
+gfx_mdata_iatt_to_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt,
+ struct mdata_iatt *mdata_iatt)
+{
+ if (!mdata_iatt || !gf_mdata_iatt)
+ return;
+ mdata_iatt->ia_atime = gf_mdata_iatt->ia_atime;
+ mdata_iatt->ia_atime_nsec = gf_mdata_iatt->ia_atime_nsec;
+ mdata_iatt->ia_mtime = gf_mdata_iatt->ia_mtime;
+ mdata_iatt->ia_mtime_nsec = gf_mdata_iatt->ia_mtime_nsec;
+ mdata_iatt->ia_ctime = gf_mdata_iatt->ia_ctime;
+ mdata_iatt->ia_ctime_nsec = gf_mdata_iatt->ia_ctime_nsec;
+}
+
+static inline void
+gfx_mdata_iatt_from_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt,
+ struct mdata_iatt *mdata_iatt)
+{
+ if (!mdata_iatt || !gf_mdata_iatt)
+ return;
+ gf_mdata_iatt->ia_atime = mdata_iatt->ia_atime;
+ gf_mdata_iatt->ia_atime_nsec = mdata_iatt->ia_atime_nsec;
+ gf_mdata_iatt->ia_mtime = mdata_iatt->ia_mtime;
+ gf_mdata_iatt->ia_mtime_nsec = mdata_iatt->ia_mtime_nsec;
+ gf_mdata_iatt->ia_ctime = mdata_iatt->ia_ctime;
+ gf_mdata_iatt->ia_ctime_nsec = mdata_iatt->ia_ctime_nsec;
+}
+
+static inline void
gfx_stat_to_iattx(struct gfx_iattx *gf_stat, struct iatt *iatt)
{
if (!iatt || !gf_stat)
@@ -721,6 +749,12 @@ dict_to_xdr(dict_t *this, gfx_dict *dict)
gfx_stat_from_iattx(&xpair->value.gfx_value_u.iatt,
(struct iatt *)dpair->value->data);
break;
+ case GF_DATA_TYPE_MDATA:
+ index++;
+ gfx_mdata_iatt_from_mdata_iatt(
+ &xpair->value.gfx_value_u.mdata_iatt,
+ (struct mdata_iatt *)dpair->value->data);
+ break;
case GF_DATA_TYPE_GFUUID:
index++;
memcpy(&xpair->value.gfx_value_u.uuid, dpair->value->data,
@@ -728,6 +762,7 @@ dict_to_xdr(dict_t *this, gfx_dict *dict)
break;
case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
index++;
/* Ideally, each type of data stored in dictionary
should have type. A pointer type shouldn't be
@@ -741,7 +776,7 @@ dict_to_xdr(dict_t *this, gfx_dict *dict)
xpair->value.gfx_value_u.other.other_len = dpair->value->len;
/* Change this to INFO, after taking the above down */
- gf_msg("dict", GF_LOG_INFO, EINVAL, LG_MSG_DICT_SERIAL_FAILED,
+ gf_msg("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_DICT_SERIAL_FAILED,
"key '%s' would not be sent on wire in the future",
dpair->key);
break;
@@ -786,6 +821,7 @@ xdr_to_dict(gfx_dict *dict, dict_t **to)
dict_t *this = NULL;
unsigned char *uuid = NULL;
struct iatt *iatt = NULL;
+ struct mdata_iatt *mdata_iatt = NULL;
if (!to || !dict)
goto out;
@@ -853,7 +889,32 @@ xdr_to_dict(gfx_dict *dict, dict_t **to)
gfx_stat_to_iattx(&xpair->value.gfx_value_u.iatt, iatt);
ret = dict_set_iatt(this, key, iatt, false);
break;
+ case GF_DATA_TYPE_MDATA:
+ mdata_iatt = GF_CALLOC(1, sizeof(struct mdata_iatt),
+ gf_common_mt_char);
+ if (!mdata_iatt) {
+ errno = ENOMEM;
+ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "failed to allocate memory. key: %s", key);
+ ret = -1;
+ goto out;
+ }
+ gfx_mdata_iatt_to_mdata_iatt(
+ &xpair->value.gfx_value_u.mdata_iatt, mdata_iatt);
+ ret = dict_set_mdata(this, key, mdata_iatt, false);
+ if (ret != 0) {
+ GF_FREE(mdata_iatt);
+ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ LG_MSG_DICT_SET_FAILED,
+ "failed to set the key (%s)"
+ " into dict",
+ key);
+ ret = -1;
+ goto out;
+ }
+ break;
case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
value = GF_MALLOC(xpair->value.gfx_value_u.other.other_len + 1,
gf_common_mt_char);
if (!value) {
diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x
index 5ee86c24a0b..d3b1d0dfaf0 100644
--- a/rpc/xdr/src/glusterfs4-xdr.x
+++ b/rpc/xdr/src/glusterfs4-xdr.x
@@ -11,11 +11,9 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
-%#include "glusterfs-fops.h"
+%#include <glusterfs/glusterfs-fops.h>
%#include "glusterfs3-xdr.h"
-
/* Need to consume iattx and new dict in all the fops */
struct gfx_iattx {
opaque ia_gfid[16];
@@ -46,7 +44,17 @@ struct gfx_iattx {
unsigned int mode; /* type of file and rwx mode */
};
-union gfx_value switch (gf_dict_data_type_t type) {
+struct gfx_mdata_iatt {
+ hyper ia_atime; /* last access time */
+ hyper ia_mtime; /* last modification time */
+ hyper ia_ctime; /* last status change time */
+
+ unsigned int ia_atime_nsec;
+ unsigned int ia_mtime_nsec;
+ unsigned int ia_ctime_nsec;
+};
+
+union gfx_value switch (int type) {
case GF_DATA_TYPE_INT:
hyper value_int;
case GF_DATA_TYPE_UINT:
@@ -60,7 +68,10 @@ union gfx_value switch (gf_dict_data_type_t type) {
case GF_DATA_TYPE_GFUUID:
opaque uuid[16];
case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
opaque other<>;
+ case GF_DATA_TYPE_MDATA:
+ gfx_mdata_iatt mdata_iatt;
};
/* AUTH */
@@ -628,6 +639,19 @@ struct gfx_seek_rsp {
struct gfx_setvolume_req {
gfx_dict dict;
} ;
+
+ struct gfx_copy_file_range_req {
+ opaque gfid1[16];
+ opaque gfid2[16];
+ quad_t fd_in;
+ quad_t fd_out;
+ u_quad_t off_in;
+ u_quad_t off_out;
+ unsigned int size;
+ unsigned int flag;
+ gfx_dict xdata; /* Extra data */
+};
+
struct gfx_setvolume_rsp {
int op_ret;
int op_errno;
@@ -755,126 +779,6 @@ struct gfx_setactivelk_req {
gfx_dict xdata;
};
-union compound_req_v2 switch (glusterfs_fop_t fop_enum) {
- case GF_FOP_STAT: gfx_stat_req compound_stat_req;
- case GF_FOP_READLINK: gfx_readlink_req compound_readlink_req;
- case GF_FOP_MKNOD: gfx_mknod_req compound_mknod_req;
- case GF_FOP_MKDIR: gfx_mkdir_req compound_mkdir_req;
- case GF_FOP_UNLINK: gfx_unlink_req compound_unlink_req;
- case GF_FOP_RMDIR: gfx_rmdir_req compound_rmdir_req;
- case GF_FOP_SYMLINK: gfx_symlink_req compound_symlink_req;
- case GF_FOP_RENAME: gfx_rename_req compound_rename_req;
- case GF_FOP_LINK: gfx_link_req compound_link_req;
- case GF_FOP_TRUNCATE: gfx_truncate_req compound_truncate_req;
- case GF_FOP_OPEN: gfx_open_req compound_open_req;
- case GF_FOP_READ: gfx_read_req compound_read_req;
- case GF_FOP_WRITE: gfx_write_req compound_write_req;
- case GF_FOP_STATFS: gfx_statfs_req compound_statfs_req;
- case GF_FOP_FLUSH: gfx_flush_req compound_flush_req;
- case GF_FOP_FSYNC: gfx_fsync_req compound_fsync_req;
- case GF_FOP_GETXATTR: gfx_getxattr_req compound_getxattr_req;
- case GF_FOP_SETXATTR: gfx_setxattr_req compound_setxattr_req;
- case GF_FOP_REMOVEXATTR: gfx_removexattr_req compound_removexattr_req;
- case GF_FOP_OPENDIR: gfx_opendir_req compound_opendir_req;
- case GF_FOP_FSYNCDIR: gfx_fsyncdir_req compound_fsyncdir_req;
- case GF_FOP_ACCESS: gfx_access_req compound_access_req;
- case GF_FOP_CREATE: gfx_create_req compound_create_req;
- case GF_FOP_FTRUNCATE: gfx_ftruncate_req compound_ftruncate_req;
- case GF_FOP_FSTAT: gfx_fstat_req compound_fstat_req;
- case GF_FOP_LK: gfx_lk_req compound_lk_req;
- case GF_FOP_LOOKUP: gfx_lookup_req compound_lookup_req;
- case GF_FOP_READDIR: gfx_readdir_req compound_readdir_req;
- case GF_FOP_INODELK: gfx_inodelk_req compound_inodelk_req;
- case GF_FOP_FINODELK: gfx_finodelk_req compound_finodelk_req;
- case GF_FOP_ENTRYLK: gfx_entrylk_req compound_entrylk_req;
- case GF_FOP_FENTRYLK: gfx_fentrylk_req compound_fentrylk_req;
- case GF_FOP_XATTROP: gfx_xattrop_req compound_xattrop_req;
- case GF_FOP_FXATTROP: gfx_fxattrop_req compound_fxattrop_req;
- case GF_FOP_FGETXATTR: gfx_fgetxattr_req compound_fgetxattr_req;
- case GF_FOP_FSETXATTR: gfx_fsetxattr_req compound_fsetxattr_req;
- case GF_FOP_RCHECKSUM: gfx_rchecksum_req compound_rchecksum_req;
- case GF_FOP_SETATTR: gfx_setattr_req compound_setattr_req;
- case GF_FOP_FSETATTR: gfx_fsetattr_req compound_fsetattr_req;
- case GF_FOP_READDIRP: gfx_readdirp_req compound_readdirp_req;
- case GF_FOP_RELEASE: gfx_release_req compound_release_req;
- case GF_FOP_RELEASEDIR: gfx_releasedir_req compound_releasedir_req;
- case GF_FOP_FREMOVEXATTR: gfx_fremovexattr_req compound_fremovexattr_req;
- case GF_FOP_FALLOCATE: gfx_fallocate_req compound_fallocate_req;
- case GF_FOP_DISCARD: gfx_discard_req compound_discard_req;
- case GF_FOP_ZEROFILL: gfx_zerofill_req compound_zerofill_req;
- case GF_FOP_IPC: gfx_ipc_req compound_ipc_req;
- case GF_FOP_SEEK: gfx_seek_req compound_seek_req;
- case GF_FOP_LEASE: gfx_lease_req compound_lease_req;
- default: void;
-};
-
-struct gfx_compound_req {
- int compound_version;
- glusterfs_compound_fop_t compound_fop_enum;
- compound_req_v2 compound_req_array<>;
- gfx_dict xdata;
-};
-
-union compound_rsp_v2 switch (glusterfs_fop_t fop_enum) {
- case GF_FOP_STAT: gfx_common_iatt_rsp compound_stat_rsp;
- case GF_FOP_READLINK: gfx_readlink_rsp compound_readlink_rsp;
- case GF_FOP_MKNOD: gfx_common_3iatt_rsp compound_mknod_rsp;
- case GF_FOP_MKDIR: gfx_common_3iatt_rsp compound_mkdir_rsp;
- case GF_FOP_UNLINK: gfx_common_2iatt_rsp compound_unlink_rsp;
- case GF_FOP_RMDIR: gfx_common_2iatt_rsp compound_rmdir_rsp;
- case GF_FOP_SYMLINK: gfx_common_3iatt_rsp compound_symlink_rsp;
- case GF_FOP_RENAME: gfx_rename_rsp compound_rename_rsp;
- case GF_FOP_LINK: gfx_common_3iatt_rsp compound_link_rsp;
- case GF_FOP_TRUNCATE: gfx_common_2iatt_rsp compound_truncate_rsp;
- case GF_FOP_OPEN: gfx_open_rsp compound_open_rsp;
- case GF_FOP_READ: gfx_read_rsp compound_read_rsp;
- case GF_FOP_WRITE: gfx_common_2iatt_rsp compound_write_rsp;
- case GF_FOP_STATFS: gfx_statfs_rsp compound_statfs_rsp;
- case GF_FOP_FLUSH: gfx_common_rsp compound_flush_rsp;
- case GF_FOP_FSYNC: gfx_common_2iatt_rsp compound_fsync_rsp;
- case GF_FOP_GETXATTR: gfx_common_dict_rsp compound_getxattr_rsp;
- case GF_FOP_SETXATTR: gfx_common_rsp compound_setxattr_rsp;
- case GF_FOP_REMOVEXATTR: gfx_common_rsp compound_removexattr_rsp;
- case GF_FOP_OPENDIR: gfx_open_rsp compound_opendir_rsp;
- case GF_FOP_FSYNCDIR: gfx_common_rsp compound_fsyncdir_rsp;
- case GF_FOP_ACCESS: gfx_common_rsp compound_access_rsp;
- case GF_FOP_CREATE: gfx_create_rsp compound_create_rsp;
- case GF_FOP_FTRUNCATE: gfx_common_2iatt_rsp compound_ftruncate_rsp;
- case GF_FOP_FSTAT: gfx_common_iatt_rsp compound_fstat_rsp;
- case GF_FOP_LK: gfx_lk_rsp compound_lk_rsp;
- case GF_FOP_LOOKUP: gfx_common_2iatt_rsp compound_lookup_rsp;
- case GF_FOP_READDIR: gfx_readdir_rsp compound_readdir_rsp;
- case GF_FOP_INODELK: gfx_common_rsp compound_inodelk_rsp;
- case GF_FOP_FINODELK: gfx_common_rsp compound_finodelk_rsp;
- case GF_FOP_ENTRYLK: gfx_common_rsp compound_entrylk_rsp;
- case GF_FOP_FENTRYLK: gfx_common_rsp compound_fentrylk_rsp;
- case GF_FOP_XATTROP: gfx_common_dict_rsp compound_xattrop_rsp;
- case GF_FOP_FXATTROP: gfx_common_dict_rsp compound_fxattrop_rsp;
- case GF_FOP_FGETXATTR: gfx_common_dict_rsp compound_fgetxattr_rsp;
- case GF_FOP_FSETXATTR: gfx_common_rsp compound_fsetxattr_rsp;
- case GF_FOP_RCHECKSUM: gfx_rchecksum_rsp compound_rchecksum_rsp;
- case GF_FOP_SETATTR: gfx_common_2iatt_rsp compound_setattr_rsp;
- case GF_FOP_FSETATTR: gfx_common_2iatt_rsp compound_fsetattr_rsp;
- case GF_FOP_READDIRP: gfx_readdirp_rsp compound_readdirp_rsp;
- case GF_FOP_RELEASE: gfx_common_rsp compound_release_rsp;
- case GF_FOP_RELEASEDIR: gfx_common_rsp compound_releasedir_rsp;
- case GF_FOP_FREMOVEXATTR: gfx_common_rsp compound_fremovexattr_rsp;
- case GF_FOP_FALLOCATE: gfx_common_2iatt_rsp compound_fallocate_rsp;
- case GF_FOP_DISCARD: gfx_common_2iatt_rsp compound_discard_rsp;
- case GF_FOP_ZEROFILL: gfx_common_2iatt_rsp compound_zerofill_rsp;
- case GF_FOP_IPC: gfx_common_rsp compound_ipc_rsp;
- case GF_FOP_SEEK: gfx_seek_rsp compound_seek_rsp;
- case GF_FOP_LEASE: gfx_lease_rsp compound_lease_rsp;
- default: void;
-};
-
-struct gfx_compound_rsp {
- int op_ret;
- int op_errno;
- compound_rsp_v2 compound_rsp_array<>;
- gfx_dict xdata;
-};
-
struct gfs4_inodelk_contention_req {
opaque gfid[16];
struct gf_proto_flock flock;
diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym
index bd9131be7c6..8fa0e0ddd8a 100644
--- a/rpc/xdr/src/libgfxdr.sym
+++ b/rpc/xdr/src/libgfxdr.sym
@@ -28,6 +28,8 @@ xdr_gd1_mgmt_v3_brick_op_req
xdr_gd1_mgmt_v3_brick_op_rsp
xdr_gd1_mgmt_v3_commit_req
xdr_gd1_mgmt_v3_commit_rsp
+xdr_gd1_mgmt_v3_post_commit_req
+xdr_gd1_mgmt_v3_post_commit_rsp
xdr_gd1_mgmt_v3_lock_req
xdr_gd1_mgmt_v3_lock_rsp
xdr_gd1_mgmt_v3_post_val_req
@@ -251,6 +253,7 @@ xdr_to_write3args
xdr_vector_round_up
xdr_gfx_read_rsp
xdr_gfx_iattx
+xdr_gfx_mdata_iatt
xdr_gfx_value
xdr_gfx_dict_pair
xdr_gfx_dict
@@ -344,3 +347,4 @@ xdr_compound_req_v2
xdr_gfx_compound_req
xdr_compound_rsp_v2
xdr_gfx_compound_rsp
+xdr_gfx_copy_file_range_req
diff --git a/rpc/xdr/src/mount3udp.x b/rpc/xdr/src/mount3udp.x
index 4fafaa053f8..e8366df400c 100644
--- a/rpc/xdr/src/mount3udp.x
+++ b/rpc/xdr/src/mount3udp.x
@@ -11,7 +11,7 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/compat.h>
/* This is used by rpcgen to auto generate the rpc stubs.
* mount3udp_svc.c is heavily modified though
diff --git a/rpc/xdr/src/nlm4-xdr.x b/rpc/xdr/src/nlm4-xdr.x
index 0fb3a5945a1..847b0e64491 100644
--- a/rpc/xdr/src/nlm4-xdr.x
+++ b/rpc/xdr/src/nlm4-xdr.x
@@ -11,7 +11,7 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/compat.h>
/* .x file defined as according to the RFC */
diff --git a/rpc/xdr/src/nsm-xdr.x b/rpc/xdr/src/nsm-xdr.x
index 65b36b27d31..7c16a741f1d 100644
--- a/rpc/xdr/src/nsm-xdr.x
+++ b/rpc/xdr/src/nsm-xdr.x
@@ -11,7 +11,7 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/compat.h>
/*
* This defines the maximum length of the string
diff --git a/rpc/xdr/src/portmap-xdr.x b/rpc/xdr/src/portmap-xdr.x
index e360911bc07..23515572b9f 100644
--- a/rpc/xdr/src/portmap-xdr.x
+++ b/rpc/xdr/src/portmap-xdr.x
@@ -11,7 +11,7 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/compat.h>
struct pmap_port_by_brick_req {
string brick<>;
diff --git a/rpc/xdr/src/rpc-common-xdr.x b/rpc/xdr/src/rpc-common-xdr.x
index b01d282f368..baf8b4313c8 100644
--- a/rpc/xdr/src/rpc-common-xdr.x
+++ b/rpc/xdr/src/rpc-common-xdr.x
@@ -11,13 +11,12 @@
#ifdef RPC_XDR
%#include "rpc-pragmas.h"
#endif
-%#include "compat.h"
+%#include <glusterfs/glusterfs-fops.h>
/* This file has definition of few XDR structures which are
* not captured in any section specific file */
%#include "xdr-common.h"
-%#include "glusterfs-fops.h"
struct auth_glusterfs_parms_v2 {
int pid;
diff --git a/rpc/xdr/src/xdr-generic.h b/rpc/xdr/src/xdr-generic.h
index 367ef555dd3..794dda508cc 100644
--- a/rpc/xdr/src/xdr-generic.h
+++ b/rpc/xdr/src/xdr-generic.h
@@ -15,7 +15,7 @@
#include <rpc/types.h>
#include <rpc/xdr.h>
-#include "compat.h"
+#include <glusterfs/compat.h>
#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
diff --git a/rpc/xdr/src/xdr-nfs3.c b/rpc/xdr/src/xdr-nfs3.c
index aca9a299b0e..cfccaaa89b8 100644
--- a/rpc/xdr/src/xdr-nfs3.c
+++ b/rpc/xdr/src/xdr-nfs3.c
@@ -23,7 +23,7 @@
#endif
#include "xdr-nfs3.h"
-#include "mem-pool.h"
+#include <glusterfs/mem-pool.h>
#include "xdr-common.h"
bool_t
diff --git a/run-tests.sh b/run-tests.sh
index 4e722d21106..e2a1655d8e0 100755
--- a/run-tests.sh
+++ b/run-tests.sh
@@ -2,6 +2,14 @@
# Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
#
+# As many tests are designed to take values of variables from 'env.rc',
+# it is good to source the file. While it is also required to source the
+# file individually in each tests (as it should be possible to run the
+# tests separately), exporting variables from env.rc is not harmful if
+# done here
+
+source ./tests/env.rc
+
export TZ=UTC
force="no"
head="yes"
@@ -14,6 +22,8 @@ result_output="/tmp/gluster_regression.txt"
section_separator="========================================"
run_timeout=200
kill_after_time=5
+nfs_tests=$RUN_NFS_TESTS
+
# Option below preserves log tarballs for each run of a test separately
# named: <test>-iteration-<n>.tar
# If set to any other value, then log tarball is just named after the test and
@@ -255,8 +265,10 @@ function match()
# G_TESTDEF_TEST_STATUS_NETBSD7
# Some examples:
# G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=123456
+# G_TESTDEF_TEST_STATUS_CENTOS6=BRICK_MUX_BAD_TEST,BUG=123456
# G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=4444444
# G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=123456;555555
+# G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TESTS,BUG=1385758
# You can change status of test to enabled or delete the line only if all the
# bugs are closed or modified or if the patch fixes it.
function get_test_status ()
@@ -316,6 +328,7 @@ function get_bug_list_for_disabled_test ()
function run_tests()
{
RES=0
+ FLAKY=''
FAILED=''
TESTS_NEEDED_RETRY=''
GENERATED_CORE=''
@@ -337,15 +350,16 @@ function run_tests()
timeout_cmd_exists="no"
fi
- for t in $(find ${regression_testsdir}/tests -name '*.t' \
- | LC_COLLATE=C sort) ; do
+ all_tests=($(find ${regression_testsdir}/tests -name '*.t' | sort))
+ all_tests_cnt=${#all_tests[@]}
+ for t in "${all_tests[@]}" ; do
old_cores=$(ls /*-*.core 2> /dev/null | wc -l)
total_tests=$((total_tests+1))
if match $t "$@" ; then
selected_tests=$((selected_tests+1))
echo
- echo $section_separator$section_separator
- if [[ $(get_test_status $t) == "BAD_TEST" ]] && \
+ echo $section_separator "(${total_tests} / ${all_tests_cnt})" $section_separator
+ if [[ $(get_test_status $t) =~ "BAD_TEST" ]] && \
[[ $skip_bad_tests == "yes" ]]
then
skipped_bad_tests=$((skipped_bad_tests+1))
@@ -365,6 +379,14 @@ function run_tests()
echo
continue
fi
+ if [[ $(get_test_status $t) == "NFS_TEST" ]] && \
+ [[ $nfs_tests == "no" ]]
+ then
+ echo "Skipping nfs test file $t"
+ echo $section_separator$section_separator
+ echo
+ continue
+ fi
total_run_tests=$((total_run_tests+1))
echo "[$(date +%H:%M:%S)] Running tests in file $t"
starttime="$(date +%s)"
@@ -375,7 +397,7 @@ function run_tests()
cmd_timeout=$(grep "SCRIPT_TIMEOUT=" ${t} | cut -f2 -d'=');
echo "Timeout set is ${cmd_timeout}, default ${run_timeout}"
fi
- timeout -k ${kill_after_time} ${cmd_timeout} prove -vmfe '/bin/bash' ${t}
+ timeout --foreground -k ${kill_after_time} ${cmd_timeout} prove -vmfe '/bin/bash' ${t}
else
prove -vmfe '/bin/bash' ${t}
fi
@@ -399,7 +421,7 @@ function run_tests()
echo ""
if [ ${timeout_cmd_exists} == "yes" ]; then
- timeout -k ${kill_after_time} ${cmd_timeout} prove -vmfe '/bin/bash' ${t}
+ timeout --foreground -k ${kill_after_time} ${cmd_timeout} prove -vmfe '/bin/bash' ${t}
else
prove -vmfe '/bin/bash' ${t}
fi
@@ -412,9 +434,17 @@ function run_tests()
TESTS_NEEDED_RETRY="${TESTS_NEEDED_RETRY}${t} "
fi
+
+
if [ ${TMP_RES} -ne 0 ] ; then
- RES=${TMP_RES}
- FAILED="${FAILED}${t} "
+ if [[ "$t" == *"tests/000-flaky/"* ]]; then
+ FLAKY="${FLAKY}${t} "
+ echo "FAILURE -> SUCCESS: Flaky test"
+ TMP_RES=0
+ else
+ RES=${TMP_RES}
+ FAILED="${FAILED}${t} "
+ fi
fi
new_cores=$(ls /*-*.core 2> /dev/null | wc -l)
@@ -449,8 +479,10 @@ function run_tests()
echo "$key - ${ELAPSEDTIMEMAP["$key"]} second"
done | sort -rn -k3
- # Output the errors into a file
+ # initialize the output file
echo > "${result_output}"
+
+ # Output the errors into a file
if [ ${RES} -ne 0 ] ; then
FAILED=$( echo ${FAILED} | tr ' ' '\n' | sort -u )
FAILED_COUNT=$( echo -n "${FAILED}" | grep -c '^' )
@@ -463,7 +495,13 @@ function run_tests()
TESTS_NEEDED_RETRY=$( echo ${TESTS_NEEDED_RETRY} | tr ' ' '\n' | sort -u )
RETRY_COUNT=$( echo -n "${TESTS_NEEDED_RETRY}" | grep -c '^' )
if [ ${RETRY_COUNT} -ne 0 ] ; then
- echo -e "\n${RETRY_COUNT} test(s) needed retry \n${TESTS_NEEDED_RETRY}"
+ echo -e "\n${RETRY_COUNT} test(s) needed retry \n${TESTS_NEEDED_RETRY}" >> "${result_output}"
+ fi
+
+ FLAKY_TESTS_FAILED=$( echo ${FLAKY} | tr ' ' '\n' | sort -u )
+ RETRY_COUNT=$( echo -n "${FLAKY_TESTS_FAILED}" | grep -c '^' )
+ if [ ${RETRY_COUNT} -ne 0 ] ; then
+ echo -e "\n${RETRY_COUNT} flaky test(s) marked as success even though they failed \n${FLAKY_TESTS_FAILED}" >> "${result_output}"
fi
echo
@@ -492,8 +530,38 @@ function run_head_tests()
run_tests "$htests"
}
-function parse_args () {
- args=`getopt frcbkphHo:t: "$@"`
+function show_usage ()
+{
+ cat <<EOF
+Usage: $0 <opts> [<glob>|<bzid>]...
+
+Options:
+
+-f force
+-h skip tests altering from HEAD
+-H run only tests altering from HEAD
+-r retry failed tests
+-R do not retry failed tests
+-c dont't exit on failure
+-b don't skip bad tests
+-k don't skip known bugs
+-p don't keep logs from preceding runs
+-o OUTPUT
+-t TIMEOUT
+-n skip NFS tests
+--help
+EOF
+}
+
+usage="no"
+
+function parse_args ()
+{
+ args=`getopt -u -l help frRcbkphHno:t: "$@"`
+ if ! [ $? -eq 0 ]; then
+ show_usage
+ exit 1
+ fi
set -- $args
while [ $# -gt 0 ]; do
case "$1" in
@@ -501,12 +569,15 @@ function parse_args () {
-h) head="no" ;;
-H) head="only" ;;
-r) retry="yes" ;;
+ -R) retry="no" ;;
-c) exit_on_failure="no" ;;
-b) skip_bad_tests="no" ;;
-k) skip_known_bugs="no" ;;
-p) skip_preserve_logs="no" ;;
-o) result_output="$2"; shift;;
-t) run_timeout="$2"; shift;;
+ -n) nfs_tests="no";;
+ --help) usage="yes" ;;
--) shift; break;;
esac
shift
@@ -521,6 +592,10 @@ echo
# Get user options
parse_args "$@"
+if [ x"$usage" == x"yes" ]; then
+ show_usage
+ exit 0
+fi
# Make sure we're running as the root user
check_user
diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
new file mode 100644
index 00000000000..a55fd3e5e6a
--- /dev/null
+++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
@@ -0,0 +1,294 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=600
+
+### Basic Non-root geo-rep setup test with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+##User and group to be used for non-root geo-rep setup
+usr="nroot"
+grp="ggroup"
+
+slave_url=$usr@$slave
+slave_vol=$GSV0
+ssh_url=$usr@$SH0
+
+############################################################
+#SETUP VOLUMES AND VARIABLES
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Mount master
+#TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+#TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+
+##########################################################
+#TEST FUNCTIONS
+
+function distribute_key_non_root()
+{
+ ${GLUSTER_LIBEXECDIR}/set_geo_rep_pem_keys.sh $usr $master $slave_vol
+ echo $?
+}
+
+
+function check_status_non_root()
+{
+ local search_key=$1
+ $GEOREP_CLI $master $slave_url status | grep -F "$search_key" | wc -l
+}
+
+
+function check_and_clean_group()
+{
+ if [ $(getent group $grp) ]
+ then
+ groupdel $grp;
+ echo $?
+ else
+ echo 0
+ fi
+}
+
+function clean_lock_files()
+{
+ if [ ! -f /etc/passwd.lock ];
+ then
+ rm -rf /etc/passwd.lock;
+ fi
+
+ if [ ! -f /etc/group.lock ];
+ then
+ rm -rf /etc/group.lock;
+ fi
+
+ if [ ! -f /etc/shadow.lock ];
+ then
+ rm -rf /etc/shadow.lock;
+ fi
+
+ if [ ! -f /etc/gshadow.lock ];
+ then
+ rm -rf /etc/gshadow.lock;
+ fi
+}
+
+
+###########################################################
+#SETUP NON-ROOT GEO REPLICATION
+
+##Create ggroup group
+##First test if group exists and then create new one
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
+
+##cleanup *.lock files
+
+clean_lock_files
+
+TEST /usr/sbin/groupadd $grp
+
+clean_lock_files
+##Del if exists and create non-root user and assign it to newly created group
+userdel -r -f $usr
+TEST /usr/sbin/useradd -G $grp $usr
+
+##Modify password for non-root user to have control over distributing ssh-key
+echo "$usr:pass" | chpasswd
+
+##Set up mountbroker root
+TEST gluster-mountbroker setup /var/mountbroker-root $grp
+
+##Associate volume and non-root user to the mountbroker
+TEST gluster-mountbroker add $slave_vol $usr
+
+##Check ssh setting for clear text passwords
+sed '/^PasswordAuthentication /{s/no/yes/}' -i /etc/ssh/sshd_config && grep '^PasswordAuthentication ' /etc/ssh/sshd_config && service sshd restart
+
+
+##Restart glusterd to reflect mountbroker changages
+TEST killall_gluster;
+TEST glusterd;
+TEST pidof glusterd;
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "3" brick_count ${META_VOL}
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" brick_count $GMV0
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" brick_count $GSV0
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+## Check status of mount-broker
+TEST gluster-mountbroker status
+
+
+##Setup password-less ssh for non-root user
+#sshpass -p "pass" ssh-copy-id -i ~/.ssh/id_rsa.pub $ssh_url
+##Run ssh agent
+eval "$(ssh-agent -s)"
+PASS="pass"
+
+
+##Create a temp script to echo the SSH password, used by SSH_ASKPASS
+
+SSH_ASKPASS_SCRIPT=/tmp/ssh-askpass-script
+cat > ${SSH_ASKPASS_SCRIPT} <<EOL
+#!/bin/bash
+echo "${PASS}"
+EOL
+chmod u+x ${SSH_ASKPASS_SCRIPT}
+
+##set no display, necessary for ssh to use with setsid and SSH_ASKPASS
+export DISPLAY
+
+export SSH_ASKPASS=${SSH_ASKPASS_SCRIPT}
+
+DISPLAY=: setsid ssh-copy-id -o 'PreferredAuthentications=password' -o 'StrictHostKeyChecking=no' -i ~/.ssh/id_rsa.pub $ssh_url
+
+##Setting up PATH for gluster binaries in case of source installation
+##ssh -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $ssh_url "echo "export PATH=$PATH:/usr/local/sbin" >> ~/.bashrc"
+
+##Creating secret pem pub file
+TEST gluster-georep-sshkey generate
+
+##Create geo-rep non-root setup
+
+TEST $GEOREP_CLI $master $slave_url create push-pem
+
+#check for session creation
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_non_root "Created"
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave_url config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave_url config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+## Test for key distribution
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 distribute_key_non_root
+
+##Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave_url start
+
+## Meta volume is enabled so looking for 2 Active and 2 Passive sessions
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Active"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Passive"
+
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave_url pause
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave_url resume
+
+#Validate failure of volume stop when geo-rep is running
+TEST ! $CLI volume stop $GMV0
+
+#Negative test for ssh-port
+#Port should be integer and between 1-65535 range
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port -22
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port abc
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 6875943
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 4.5
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 22a
+
+#Config Set ssh-port to validate int validation
+TEST $GEOREP_CLI $master $slave config ssh-port 22
+
+#Hybrid directory rename test BZ#1763439
+
+TEST $GEOREP_CLI $master $slave_url config change_detector xsync
+#verify master and slave mount
+
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT "^1$" check_mounted ${master_mnt}
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT "^1$" check_mounted ${slave_mnt}
+
+#Create test data for hybrid crawl
+TEST mkdir ${master_mnt}/dir1
+TEST mkdir ${master_mnt}/dir1/dir2
+TEST mkdir ${master_mnt}/dir1/dir3
+TEST mkdir ${master_mnt}/hybrid_d1
+
+mv ${master_mnt}/hybrid_d1 ${master_mnt}/hybrid_rn_d1
+mv ${master_mnt}/dir1/dir2 ${master_mnt}/rn_dir2
+mv ${master_mnt}/dir1/dir3 ${master_mnt}/dir1/rn_dir3
+
+#Verify hybrid crawl data on slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_rn_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/rn_dir2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/rn_dir3
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave_url stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave_url delete
+
+#Cleanup authorized_keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' /home/$usr/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' /home/$usr/.ssh/authorized_keys
+
+#clear mountbroker
+gluster-mountbroker remove --user $usr
+gluster-mountbroker remove --volume $slave_vol
+
+#delete group and user created for non-root setup
+TEST userdel -r -f $usr
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
+
+##password script cleanup
+rm -rf /tmp/ssh-askpass-script
+
+
+cleanup;
+
diff --git a/tests/00-geo-rep/00-georep-verify-setup.t b/tests/00-geo-rep/00-georep-verify-setup.t
index 8cbf203a207..0d46c04102d 100644
--- a/tests/00-geo-rep/00-georep-verify-setup.t
+++ b/tests/00-geo-rep/00-georep-verify-setup.t
@@ -5,7 +5,8 @@
. $(dirname $0)/../geo-rep.rc
. $(dirname $0)/../env.rc
-SCRIPT_TIMEOUT=300
+SCRIPT_TIMEOUT=400
+GEO_REP_TIMEOUT=200
##Cleanup and start glusterd
cleanup;
@@ -51,6 +52,10 @@ TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
#BASIC GEO-REPLICATION TESTS
############################################################
+#Test invalid slave url
+TEST ! $GEOREP_CLI $master ${SH0}:${GSV0} create push-pem
+TEST ! $GEOREP_CLI $master ${SH0}:::${GSV0} create push-pem
+
#Create geo-rep session
TEST create_georep_session $master $slave
@@ -82,6 +87,15 @@ TEST pidof glusterd
TEST $CLI get-state detail
TEST pidof glusterd
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave pause
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave resume
+
+#Validate failure of volume stop when geo-rep is running
+TEST ! $CLI volume stop $GMV0
+
#Stop Geo-rep
TEST $GEOREP_CLI $master $slave stop
diff --git a/tests/00-geo-rep/01-georep-glusterd-tests.t b/tests/00-geo-rep/01-georep-glusterd-tests.t
new file mode 100644
index 00000000000..47d5116af26
--- /dev/null
+++ b/tests/00-geo-rep/01-georep-glusterd-tests.t
@@ -0,0 +1,213 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+#Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+#Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+slave1=root@${SH0}::${GSV1}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+#create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 3 $H0:$B0/${GMV0}{1,2,3};
+
+#Negative testase: Create geo-rep session, master is not started
+TEST ! $GEOREP_CLI $master $slave create push-pem
+
+TEST $CLI volume start $GMV0
+
+#create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 3 $H0:$B0/${GSV0}{1,2,3};
+
+#Negative testcase: Create geo-rep session, slave is not started
+TEST ! $GEOREP_CLI $master $slave create push-pem
+
+TEST $CLI volume start $GSV0
+
+#create_and_start_slave1_volume
+TEST $CLI volume create $GSV1 replica 3 $H0:$B0/${GSV1}{1,2,3};
+TEST $CLI volume start $GSV1
+
+#Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+#Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+#Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION GLUSTERD TESTS WITH FANOUT SETUP
+############################################################
+
+#Negative testcase: Test invalid master
+TEST ! $GEOREP_CLI master1 ${SH0}::${GSV0} create push-pem
+
+#Negatvie testcase: Test invalid slave
+TEST ! $GEOREP_CLI $master ${SH0}::slave3 create push-pem
+
+##------------------- Session 1 Creation Begin-----------------##
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+##------------------- Session 1 Creation End-----------------##
+
+##------------------- Session 2 Creation Begin-----------------##
+#Create geo-rep session2
+TEST $GEOREP_CLI $master $slave1 create ssh-port 22 no-verify
+
+#Config gluster-command-dir for session2
+TEST $GEOREP_CLI $master $slave1 config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir for session2
+TEST $GEOREP_CLI $master $slave1 config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume for session2
+TEST $GEOREP_CLI $master $slave1 config use_meta_volume true
+##------------------- Session 2 Creation End-----------------##
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+#check geo-rep status without specifying master and slave volumes
+TEST $GEOREP_CLI status
+
+#Start_georep force
+TEST $GEOREP_CLI $master $slave1 start force
+
+#Negative testcase: Create the same session after start, fails
+#With root@ prefix
+TEST ! $GEOREP_CLI $master $slave1 create push-pem
+#Without root@ prefix
+TEST ! $GEOREP_CLI $master ${SH0}::${GSV1} create push-pem
+TEST $GEOREP_CLI $master $slave1 create push-pem force
+
+##------------------- Fanout status testcases Begin --------------##
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_fanout_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_fanout_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_fanout_status_detail_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_fanout_status_detail_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_all_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_all_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_all_status_detail_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_all_status_detail_num_rows "Passive"
+
+##------------------- Fanout status testcases End --------------##
+
+##------Checkpoint Testcase Begin---------------##
+#Write I/O
+echo "test data" > $M0/file1
+TEST $GEOREP_CLI $master $slave config checkpoint now
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_checkpoint_met $master $slave
+touch $M0
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 verify_checkpoint_met $master $slave
+##------Checkpoint Testcase End---------------##
+
+##------------------ Geo-rep config testcases Begin--------------------##
+TEST $GEOREP_CLI $master $slave config
+TEST ! $GEOREP_CLI $master $slave config arsync-options '-W'
+TEST $GEOREP_CLI $master $slave config rsync-options '-W'
+TEST $GEOREP_CLI $master $slave config rsync-options
+TEST $GEOREP_CLI $master $slave config \!rsync-options
+TEST $GEOREP_CLI $master $slave config sync-xattrs false
+##------------------ Geo-rep config testcases End --------------------##
+
+##---------------- Pause/Resume testcase Begin-------------##
+#Negative testcase: Resume geo-replication session when not paused
+TEST ! $GEOREP_CLI $master $slave1 resume
+TEST $GEOREP_CLI $master $slave1 resume force
+
+#Pause geo-replication session with root@
+TEST $GEOREP_CLI $master $slave1 pause force
+
+#Resume geo-replication session with root@
+TEST $GEOREP_CLI $master $slave1 resume force
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave1 stop force
+
+#Negative testcase: Resume geo-replication session after geo-rep stop
+TEST ! $GEOREP_CLI $master $slave1 resume
+##---------------- Pause/Resume testcase End-------------##
+
+##-----------------glusterd slave key/value upgrade testcase Begin ---------##
+#Upgrade test of slave key stored in glusterd info file
+src=$(grep slave2 /var/lib/glusterd/vols/$master/info)
+#Remove slave uuuid (last part after divided by : )
+dst=${src%:*}
+
+#Update glusterd info file with old slave format
+sed -i "s|$src|$dst|g" /var/lib/glusterd/vols/$master/info
+TEST ! grep $src /var/lib/glusterd/vols/$master/info
+
+#Restart glusterd to update in-memory volinfo
+TEST pkill glusterd
+TEST glusterd;
+TEST pidof glusterd
+
+#Start geo-rep and validate slave format is updated
+TEST $GEOREP_CLI $master $slave1 start force
+TEST grep $src /var/lib/glusterd/vols/$master/info
+##-----------------glusted slave key/value upgrade testcase End ---------##
+
+#Negative testcase: Delete Geo-rep 2 fails as geo-rep is running
+TEST ! $GEOREP_CLI $master $slave1 delete
+
+#Stop and Delete Geo-rep 2
+TEST $GEOREP_CLI $master $slave1 stop force
+TEST $GEOREP_CLI $master $slave1 delete reset-sync-time
+
+#Stop and Delete Geo-rep 1
+TEST $GEOREP_CLI $master $slave stop
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/bug-1600145.t b/tests/00-geo-rep/bug-1600145.t
new file mode 100644
index 00000000000..1d38bf92682
--- /dev/null
+++ b/tests/00-geo-rep/bug-1600145.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+SCRIPT_TIMEOUT=600
+TEST glusterd;
+TEST pidof glusterd
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2};
+gluster v set all cluster.brick-multiplex on
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+n=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Passive"
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+c=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+let expected=n+2
+TEST [ "$c" -eq "$expected" ]
+
+#Kill the "Active" brick
+brick=$($GEOREP_CLI $master $slave status | grep -F "Active" | awk {'print $3'})
+cat /proc/$brick_pid/net/unix | grep "changelog"
+TEST kill_brick $GMV0 $H0 $brick
+#Expect geo-rep status to be "Faulty"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Faulty"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+cat /proc/$brick_pid/net/unix | grep "changelog"
+ls -lrth /proc/$brick_pid/fd | grep "socket"
+c=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+TEST [ "$c" -eq "$n" ]
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t
new file mode 100644
index 00000000000..26913f1d318
--- /dev/null
+++ b/tests/00-geo-rep/bug-1708603.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="gluster volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
+EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes)
+echo y | $GEOREP_CLI $master $slave config ignore-deletes true
+EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
index 38ef9f0b6d4..c45d2ff62ce 100644
--- a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
+++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
@@ -86,6 +86,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
#Verify the keys are distributed
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
#Start_georep
TEST $GEOREP_CLI $master $slave start
@@ -156,7 +159,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
#logrotate
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
#CREATE+RENAME
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
@@ -171,7 +174,7 @@ TEST $GEOREP_CLI $master $slave stop
TEST create_symlink_rename_mkdir_data
#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
-TEST create_hardlink_rename_data
+#TEST create_hardlink_rename_data
#rsnapshot usecase
TEST create_rsnapshot_data
@@ -191,7 +194,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
#rsnapshot usecase
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
@@ -200,8 +203,19 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
#Verify arequal for whole volume
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave pause force
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave resume force
#Stop Geo-rep
TEST $GEOREP_CLI $master $slave stop
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
index 4c330de1c10..d785aa59fc9 100644
--- a/tests/00-geo-rep/georep-basic-dr-rsync.t
+++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
@@ -71,6 +71,22 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created"
#Config gluster-command-dir
TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+#Negative test for ssh-port
+#Port should be integer and between 1-65535 range
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port -22
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port abc
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 6875943
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 4.5
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 22a
+
+#Config Set ssh-port to validate int validation
+TEST $GEOREP_CLI $master $slave config ssh-port 22
+
#Config gluster-command-dir
TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
@@ -86,6 +102,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
#Verify the keys are distributed
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
#Start_georep
TEST $GEOREP_CLI $master $slave start
@@ -107,6 +126,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
#Check History Crawl.
TEST $GEOREP_CLI $master $slave stop
TEST create_data "history"
+TEST create_rename_symlink_case
TEST $GEOREP_CLI $master $slave start
EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
@@ -156,7 +176,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
#logrotate
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
#CREATE+RENAME
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
@@ -200,9 +220,27 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
TEST "echo sampledata > $master_mnt/rsync_option_test_file"
-#Verify arequal for whole volume
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Test config upgrade BUG: 1707731
+config_file=$GLUSTERD_WORKDIR/geo-replication/${GMV0}_${SH0}_${GSV0}/gsyncd.conf
+cat >> $config_file<<EOL
+[peers ${GMV0} ${GSV0}]
+use_tarssh = true
+timeout = 1
+EOL
+TEST $GEOREP_CLI $master $slave stop
+TEST $GEOREP_CLI $master $slave start
+#verify that the config file is updated
+EXPECT "1" echo $(grep -Fc "vars" $config_file)
+EXPECT "1" echo $(grep -Fc "sync-method = tarssh" $config_file)
+EXPECT "1" echo $(grep -Fc "slave-timeout = 1" $config_file)
#Stop Geo-rep
TEST $GEOREP_CLI $master $slave stop
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
index 0610d2296c7..8fed929ffca 100644
--- a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
@@ -81,7 +81,7 @@ TEST $GEOREP_CLI $master $slave config use_meta_volume true
TEST $CLI volume set $GMV0 changelog.rollover-time 3
#Config tarssh as sync-engine
-TEST $GEOREP_CLI $master $slave config use_tarssh true
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
#Wait for common secret pem file to be created
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
@@ -89,6 +89,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
#Verify the keys are distributed
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
#Start_georep
TEST $GEOREP_CLI $master $slave start
@@ -159,7 +162,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
#logrotate
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
#CREATE+RENAME
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
@@ -174,7 +177,7 @@ TEST $GEOREP_CLI $master $slave stop
TEST create_symlink_rename_mkdir_data
#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
-TEST create_hardlink_rename_data
+#TEST create_hardlink_rename_data
#rsnapshot usecase
TEST create_rsnapshot_data
@@ -194,13 +197,18 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
#rsnapshot usecase
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
#Verify arequal for whole volume
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
#Stop Geo-rep
TEST $GEOREP_CLI $master $slave stop
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t
index 20a0a873311..feb2de74c90 100644
--- a/tests/00-geo-rep/georep-basic-dr-tarssh.t
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t
@@ -81,7 +81,7 @@ TEST $GEOREP_CLI $master $slave config use_meta_volume true
TEST $CLI volume set $GMV0 changelog.rollover-time 3
#Config tarssh as sync-engine
-TEST $GEOREP_CLI $master $slave config use_tarssh true
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
#Wait for common secret pem file to be created
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
@@ -89,6 +89,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
#Verify the keys are distributed
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
#Start_georep
TEST $GEOREP_CLI $master $slave start
@@ -159,7 +162,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
#logrotate
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
#CREATE+RENAME
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
@@ -199,8 +202,13 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
#rsnapshot usecase
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
#Verify arequal for whole volume
-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt}
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
#Stop Geo-rep
TEST $GEOREP_CLI $master $slave stop
diff --git a/tests/00-geo-rep/georep-basic-rsync-ec.t b/tests/00-geo-rep/georep-basic-rsync-ec.t
new file mode 100644
index 00000000000..dd1f94edbc9
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-rsync-ec.t
@@ -0,0 +1,224 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distributed Disperse volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=10
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 disperse 3 redundancy 1 $H0:$B0/${GMV0}{0..5};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 disperse 3 redundancy 1 $H0:$B0/${GSV0}{0..5};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+#TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#Test rsync-options set BUG:1629561
+TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
+#rename with existing destination case BUG:1694820
+#TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-tarssh-ec.t b/tests/00-geo-rep/georep-basic-tarssh-ec.t
new file mode 100644
index 00000000000..987bd9391c8
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-tarssh-ec.t
@@ -0,0 +1,223 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distributed Disperse volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=10
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 disperse 3 redundancy 1 $H0:$B0/${GMV0}{0..5};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 disperse 3 redundancy 1 $H0:$B0/${GSV0}{0..5};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+#TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#rename with existing destination case BUG:1694820
+#TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-config-upgrade.t b/tests/00-geo-rep/georep-config-upgrade.t
new file mode 100644
index 00000000000..557461cd9c4
--- /dev/null
+++ b/tests/00-geo-rep/georep-config-upgrade.t
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+OLD_CONFIG_PATH=$(dirname $0)/gsyncd.conf.old
+WORKING_DIR=/var/lib/glusterd/geo-replication/master_127.0.0.1_slave
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Copy old config file
+mv -f $WORKING_DIR/gsyncd.conf $WORKING_DIR/gsyncd.conf.org
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+
+#Check if config get all updates config_file
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config get updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config sync-method
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config set updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config sync-xattrs false
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config reset updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config \!sync-xattrs
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if geo-rep start updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave start
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Stop geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-stderr-hang.t b/tests/00-geo-rep/georep-stderr-hang.t
new file mode 100644
index 00000000000..496f0e6577d
--- /dev/null
+++ b/tests/00-geo-rep/georep-stderr-hang.t
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 $H0:$B0/${GMV0}1;
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 $H0:$B0/${GSV0}1;
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Set changelog roll-over time to 45 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 45
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Set sync-jobs to 1
+TEST $GEOREP_CLI $master $slave config sync-jobs 1
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+touch $M0
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Changelog Crawl"
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data_hang "rsync_hang"
+TEST create_data "history_rsync"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Create tarssh hang data
+TEST create_data_hang "tarssh_hang"
+TEST create_data "history_tar"
+
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-upgrade.t b/tests/00-geo-rep/georep-upgrade.t
new file mode 100644
index 00000000000..7523068ed50
--- /dev/null
+++ b/tests/00-geo-rep/georep-upgrade.t
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+SCRIPT_TIMEOUT=500
+
+###############################################################################################
+#Before upgrade
+###############################################################################################
+brick=/bricks/brick1
+epoch1=$(date '+%s')
+sleep 1
+epoch2=$(date '+%s')
+mkdir -p /bricks/brick1/.glusterfs/changelogs/htime
+mkdir -p /bricks/brick1/.glusterfs/changelogs
+
+#multiple htime files(changelog enable/disable scenario)
+TEST touch /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1
+TEST touch /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2
+
+#changelog files
+TEST touch /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+TEST touch /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+
+htime_file1=/bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1
+htime_file2=/bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2
+
+#data inside htime files before upgrade
+data1=/bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+data2=/bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+
+#data inside htime files after upgrade
+updated_data1=/bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch1
+updated_data2=/bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch2
+
+echo -n $data1>$htime_file1
+echo -n $data2>$htime_file2
+
+echo "Before upgrade:"
+EXPECT '1' echo $(grep $data1 $htime_file1 | wc -l)
+EXPECT '1' echo $(grep $data2 $htime_file2 | wc -l)
+
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2 | wc -l)
+###############################################################################################
+#Upgrade
+###############################################################################################
+### This needed to be fixed as this very vague finding a file with name in '/'
+### multiple file with same name can exist
+### for temp fix picking only 1st result
+TEST upgrade_script=$(find / -type f -name glusterfs-georep-upgrade.py -print | head -n 1)
+TEST python3 $upgrade_script $brick
+
+###############################################################################################
+#After upgrade
+###############################################################################################
+echo "After upgrade:"
+EXPECT '1' echo $(grep $updated_data1 $htime_file1 | wc -l)
+EXPECT '1' echo $(grep $updated_data2 $htime_file2 | wc -l)
+
+#Check directory structure inside changelogs
+TEST ! ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1.bak | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m')` | wc -l)
+EXPECT '2' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch1 | wc -l)
+
+TEST ! ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2.bak | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m')`| wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch2 | wc -l)
+
+TEST rm -rf /bricks
diff --git a/tests/00-geo-rep/gsyncd.conf.old b/tests/00-geo-rep/gsyncd.conf.old
new file mode 100644
index 00000000000..519acaf8f3e
--- /dev/null
+++ b/tests/00-geo-rep/gsyncd.conf.old
@@ -0,0 +1,47 @@
+[__meta__]
+version = 2.0
+
+[peersrx . .]
+remote_gsyncd = /usr/local/libexec/glusterfs/gsyncd
+georep_session_working_dir = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/
+ssh_command_tar = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/tar_ssh.pem
+changelog_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}-changes.log
+working_dir = /var/lib/misc/glusterfsd/${mastervol}/${eSlave}
+ignore_deletes = false
+pid_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid
+state_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
+gluster_command_dir = /usr/local/sbin/
+gluster_params = aux-gfid-mount acl
+ssh_command = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/secret.pem
+state_detail_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status
+state_socket_unencoded = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket
+socketdir = /var/run/gluster
+log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}.log
+gluster_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}.gluster.log
+special_sync_mode = partial
+change_detector = changelog
+pid-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid
+state-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
+
+[__section_order__]
+peersrx . . = 0
+peersrx . %5essh%3a = 2
+peersrx . = 3
+peers master slave = 4
+
+[peersrx . %5Essh%3A]
+remote_gsyncd = /nonexistent/gsyncd
+
+[peersrx .]
+gluster_command_dir = /usr/local/sbin/
+gluster_params = aux-gfid-mount acl
+log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log
+log_file_mbr = /var/log/glusterfs/geo-replication-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log
+gluster_log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log
+
+[peers master slave]
+session_owner = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
+master.stime_xattr_name = trusted.glusterfs.0732cbd1-3ec5-4920-ab0d-aa5a896d5214.07a9005c-ace4-4f67-b3c0-73938fb236c4.stime
+volume_id = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
+use_tarssh = true
+
diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t
index 0e321c6f095..77d82a4996f 100644
--- a/tests/basic/afr/split-brain-favorite-child-policy.t
+++ b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t
@@ -1,8 +1,8 @@
#!/bin/bash
#Test the split-brain resolution CLI commands.
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
cleanup;
@@ -16,6 +16,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.entry-self-heal off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
TEST $CLI volume start $V0
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
TEST touch $M0/file
@@ -38,7 +39,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
TEST $CLI volume heal $V0
-#file fill in split-brain
+#file still in split-brain
cat $M0/file > /dev/null
EXPECT "1" echo $?
@@ -124,7 +125,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
TEST $CLI volume heal $V0
-#file fill in split-brain
+#file still in split-brain
cat $M0/file > /dev/null
EXPECT "1" echo $?
@@ -179,7 +180,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
TEST $CLI volume heal $V0
-#file fill in split-brain
+#file still in split-brain
cat $M0/file > /dev/null
EXPECT "1" echo $?
diff --git a/tests/000-flaky/basic_changelog_changelog-snapshot.t b/tests/000-flaky/basic_changelog_changelog-snapshot.t
new file mode 100644
index 00000000000..f6cd0b04d47
--- /dev/null
+++ b/tests/000-flaky/basic_changelog_changelog-snapshot.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../snapshot.rc
+
+cleanup;
+ROLLOVER_TIME=3
+
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+BRICK_LOG=$(echo "$L1" | tr / - | sed 's/^-//g')
+TEST $CLI volume start $V0
+
+#Enable changelog
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Create snapshot
+S1="${V0}-snap1"
+
+mkdir $M0/RENAME
+mkdir $M0/LINK
+mkdir $M0/UNLINK
+mkdir $M0/RMDIR
+mkdir $M0/SYMLINK
+
+for i in {1..400} ; do touch $M0/RENAME/file$i; done
+for i in {1..400} ; do touch $M0/LINK/file$i; done
+for i in {1..400} ; do touch $M0/UNLINK/file$i; done
+for i in {1..400} ; do mkdir $M0/RMDIR/dir$i; done
+for i in {1..400} ; do touch $M0/SYMLINK/file$i; done
+
+#Write I/O in background
+for i in {1..400} ; do touch $M0/file$i 2>/dev/null; done &
+for i in {1..400} ; do mknod $M0/mknod-file$i p 2>/dev/null; done &
+for i in {1..400} ; do mkdir $M0/dir$i 2>/dev/null; done & 2>/dev/null
+for i in {1..400} ; do mv $M0/RENAME/file$i $M0/RENAME/rn-file$i 2>/dev/null; done &
+for i in {1..400} ; do ln $M0/LINK/file$i $M0/LINK/ln-file$i 2>/dev/null; done &
+for i in {1..400} ; do rm -f $M0/UNLINK/file$i 2>/dev/null; done &
+for i in {1..400} ; do rmdir $M0/RMDIR/dir$i 2>/dev/null; done &
+for i in {1..400} ; do ln -s $M0/SYMLINK/file$i $M0/SYMLINK/sym-file$i 2>/dev/null; done &
+
+sleep 1
+TEST $CLI snapshot create $S1 $V0 no-timestamp
+TEST snapshot_exists 0 $S1
+
+TEST grep '"Enabled changelog barrier"' /var/log/glusterfs/bricks/$BRICK_LOG.log
+TEST grep '"Disabled changelog barrier"' /var/log/glusterfs/bricks/$BRICK_LOG.log
+
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S1/$V0 $M1
+
+#Clean up
+TEST $CLI volume stop $V0 force
+cleanup;
diff --git a/tests/basic/distribute/rebal-all-nodes-migrate.t b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t
index acc4ffefecc..eb5d3305ac1 100644
--- a/tests/basic/distribute/rebal-all-nodes-migrate.t
+++ b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t
@@ -1,8 +1,8 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../cluster.rc
-. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../dht.rc
# Check if every single rebalance process migrated some files
@@ -10,11 +10,9 @@
function cluster_rebal_all_nodes_migrated_files {
val=0
a=$($CLI_1 volume rebalance $V0 status | grep "completed" | awk '{print $2}');
-# echo $a
b=($a)
for i in "${b[@]}"
do
-# echo "$i";
if [ "$i" -eq "0" ]; then
echo "false";
val=1;
diff --git a/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t
new file mode 100644
index 00000000000..42808ce0c0e
--- /dev/null
+++ b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This test checks that partial failure of fop results in main fop failure only
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=/$V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$M0/a bs=12347 count=1
+TEST dd if=/dev/urandom of=$M0/b bs=12347 count=1
+TEST cp $M0/b $M0/c
+TEST fallocate -p -l 101 $M0/c
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 debug.delay-gen posix;
+TEST $CLI volume set $V0 delay-gen.delay-duration 10000000;
+TEST $CLI volume set $V0 delay-gen.enable WRITE;
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 disperse.quorum-count 6
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+cksum=$(dd if=$M0/a bs=12345 count=1 | md5sum | awk '{print $1}')
+truncate -s 12345 $M0/a & #While write is waiting for 5 seconds, introduce failure
+fallocate -p -l 101 $M0/b &
+sleep 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST wait
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+EXPECT "12345" stat --format=%s $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+cksum_after_heal=$(dd if=$M0/a | md5sum | awk '{print $1}')
+TEST [[ $cksum == $cksum_after_heal ]]
+cksum=$(dd if=$M0/c | md5sum | awk '{print $1}')
+cksum_after_heal=$(dd if=$M0/b | md5sum | awk '{print $1}')
+TEST [[ $cksum == $cksum_after_heal ]]
+
+cleanup;
diff --git a/tests/basic/mount-nfs-auth.t b/tests/000-flaky/basic_mount-nfs-auth.t
index 0d4e01abb32..3d4a9cff00b 100755..100644
--- a/tests/basic/mount-nfs-auth.t
+++ b/tests/000-flaky/basic_mount-nfs-auth.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../include.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
# Our mount timeout must be as long as the time for a regular configuration
# change to be acted upon *plus* AUTH_REFRESH_TIMEOUT, not one replacing the
# other. Otherwise this process races vs. the one making the change we're
diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t
index dc9013061b0..5a88f97d726 100644
--- a/tests/bugs/core/multiplex-limit-issue-151.t
+++ b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t
@@ -1,8 +1,8 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../traps.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../volume.rc
function count_up_bricks {
$CLI --xml volume status all | grep '<status>1' | wc -l
diff --git a/tests/bugs/distribute/bug-1117851.t b/tests/000-flaky/bugs_distribute_bug-1117851.t
index 678103869cf..5980bf2fd4b 100755..100644
--- a/tests/bugs/distribute/bug-1117851.t
+++ b/tests/000-flaky/bugs_distribute_bug-1117851.t
@@ -1,7 +1,9 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
create_files () {
for i in {1..1000}; do
diff --git a/tests/bugs/distribute/bug-1122443.t b/tests/000-flaky/bugs_distribute_bug-1122443.t
index 906be7072bd..abd37082b33 100644
--- a/tests/bugs/distribute/bug-1122443.t
+++ b/tests/000-flaky/bugs_distribute_bug-1122443.t
@@ -1,8 +1,8 @@
#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
make_files() {
mkdir $1 && \
@@ -42,8 +42,8 @@ TEST glusterfs -s $H0 --volfile-id $V0 $M0
TEST make_files $M0/subdir
# Get mtime before migration
-BEFORE="$(stat -c %n:%Y $M0/subdir/* | tr '\n' ',')"
-
+BEFORE="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')"
+echo $BEFORE
# Migrate brick
TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 start
@@ -51,9 +51,10 @@ EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 commit
# Get mtime after migration
-EXPECT_WITHIN 5 RECONNECTED bug_1113050_workaround $M0/subdir/*
-AFTER="$(stat -c %n:%Y $M0/subdir/* | tr '\n' ',')"
-
+EXPECT_WITHIN 30 RECONNECTED bug_1113050_workaround $M0/subdir/symlink
+sleep 3
+AFTER="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')"
+echo $AFTER
# Check if mtime is unchanged
TEST [ "$AFTER" == "$BEFORE" ]
diff --git a/tests/bugs/glusterd/bug-857330/common.rc b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc
index d0aa4b1a640..bd122eff18c 100644
--- a/tests/bugs/glusterd/bug-857330/common.rc
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc
@@ -1,4 +1,4 @@
-. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../include.rc
UUID_REGEX='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
diff --git a/tests/bugs/glusterd/bug-857330/normal.t b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t
index ad0c8844fae..6c1cf54ec3c 100755
--- a/tests/bugs/glusterd/bug-857330/normal.t
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t
@@ -1,7 +1,7 @@
#!/bin/bash
. $(dirname $0)/common.rc
-. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../volume.rc
cleanup;
TEST glusterd
@@ -14,7 +14,7 @@ TEST $CLI volume start $V0;
TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
-TEST $PYTHON $(dirname $0)/../../../utils/create-files.py \
+TEST $PYTHON $(dirname $0)/../../utils/create-files.py \
--multi -b 10 -d 10 -n 10 $M0;
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
diff --git a/tests/bugs/glusterd/bug-857330/xml.t b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t
index 8383d2a0711..11785adacdb 100755
--- a/tests/bugs/glusterd/bug-857330/xml.t
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t
@@ -1,7 +1,7 @@
#!/bin/bash
. $(dirname $0)/common.rc
-. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../volume.rc
cleanup;
@@ -15,7 +15,7 @@ TEST $CLI volume start $V0;
TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
-TEST $PYTHON $(dirname $0)/../../../utils/create-files.py \
+TEST $PYTHON $(dirname $0)/../../utils/create-files.py \
--multi -b 10 -d 10 -n 10 $M0;
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
diff --git a/tests/000-flaky/bugs_glusterd_quorum-value-check.t b/tests/000-flaky/bugs_glusterd_quorum-value-check.t
new file mode 100644
index 00000000000..a431b8c4fd4
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_quorum-value-check.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function check_quorum_nfs() {
+ local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')"
+ local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')"
+
+ if [ $qnfs = $qinfo ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 cluster.quorum-count 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+TEST $CLI volume set $V0 cluster.quorum-count 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+TEST $CLI volume set $V0 cluster.quorum-count 3
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+
+cleanup;
diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/000-flaky/bugs_nfs_bug-1116503.t
index c9ed840ec92..fc50021acc7 100644
--- a/tests/bugs/nfs/bug-1116503.t
+++ b/tests/000-flaky/bugs_nfs_bug-1116503.t
@@ -3,10 +3,11 @@
# Verify that mounting NFS over UDP (MOUNT service only) works.
#
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
cleanup;
TEST glusterd
diff --git a/tests/features/lock-migration/lkmigration-set-option.t b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t
index 4340438591f..1327ef3579f 100644
--- a/tests/features/lock-migration/lkmigration-set-option.t
+++ b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t
@@ -1,7 +1,7 @@
#!/bin/bash
# Test to check
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
#Check lock-migration set option sanity
cleanup;
diff --git a/tests/afr.rc b/tests/afr.rc
index 35f352df78f..241789903ba 100644
--- a/tests/afr.rc
+++ b/tests/afr.rc
@@ -105,3 +105,19 @@ function get_quorum_type()
local repl_id="$3"
cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}'
}
+
+function afr_private_key_value()
+{
+ local v=$1
+ local m=$2
+ local replica_id=$3
+ local key=$4
+#xargs at the end will strip leading spaces
+ grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
+}
+
+function afr_anon_entry_count()
+{
+ local b=$1
+ ls $b/.glusterfs-anonymous-inode* | wc -l
+}
diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
new file mode 100644
index 00000000000..896ba0c9b2c
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+#Test that anon-inode entry is not cleaned up as long as there exists at least
+#one valid entry
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/a $M0/b
+
+gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
+gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/a $M0/a-new
+TEST mv $M0/b $M0/b-new
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! ls $M0/a
+TEST ! ls $M0/b
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+#Make sure index heal doesn't happen after enabling heal
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+#Allow time for a scan
+sleep 5
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
+TEST rm -f $M0/a-new
+TEST stat $M0/b-new
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
+
+cleanup
diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
new file mode 100644
index 00000000000..f4cf37a2fa0
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+#Tests that afr-anon-inode test cases work fine as expected
+#These are cases where in entry-heal/name-heal we dont know entry for an inode
+#so these inodes are kept in a special directory
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode no
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST mkdir -p $M0/d1/b $M0/d2/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/d2/a $M0/d1
+TEST mv $M0/d1/b $M0/d2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
+TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
+anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
+
+TEST ! ls $M0/$anon_inode_name
+EXPECT "^4$" echo $(ls -a $M0 | wc -l)
+
+#Test purging code path by shd
+TEST $CLI volume heal $V0 disable
+TEST mkdir $M0/l0 $M0/l1 $M0/l2
+TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
+TEST ln $M0/del-file $M0/del-file-link
+TEST ln $M0/l0/file $M0/l1/file-link1
+TEST ln $M0/l0/file $M0/l2/file-link2
+TEST mkdir -p $M0/del-recursive-dir/d1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M0/del-file $M0/del-file-nolink
+TEST rm -rf $M0/del-recursive-dir
+TEST mv $M0/d1/a $M0/d2
+TEST mv $M0/l0/file $M0/l0/renamed-file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
+
+nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
+link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
+dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
+rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
+rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
+TEST ! stat $M0/del-file
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST ! stat $M0/del-file-nolink
+TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
+TEST ! stat $M0/del-recursive-dir
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+TEST ! stat $M0/d1/a
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST ! stat $M0/l0/file
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+TEST ! stat $M0/l1/file-link1
+TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
+TEST ! stat $M0/l2/file-link2
+TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
+
+#Simulate only anon-inodes present in all bricks
+TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
+
+#Test that shd doesn't cleanup anon-inodes when some bricks are down
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+$CLI volume heal $V0
+sleep 5 #Allow time for completion of one scan
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
+
+#Test that rename indeed happened instead of rmdir/mkdir
+renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
+EXPECT "$rename_dir_inum" echo $renamed_dir_inum
+cleanup;
diff --git a/tests/basic/afr/afr-seek.t b/tests/basic/afr/afr-seek.t
new file mode 100644
index 00000000000..c12ee011660
--- /dev/null
+++ b/tests/basic/afr/afr-seek.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+SEEK=$(dirname $0)/seek
+build_tester $(dirname $0)/../seek.c -o ${SEEK}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir -p $B0/${V0}{0..2}
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+
+TEST $CLI volume start $V0
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST ${SEEK} create ${M0}/test 0 1 1048576 1
+# Determine underlying filesystem allocation block size
+BSIZE="$(($(${SEEK} scan ${M0}/test hole 0) * 2))"
+
+TEST ${SEEK} create ${M0}/test 0 ${BSIZE} $((${BSIZE} * 4 + 512)) ${BSIZE}
+
+EXPECT "^0$" ${SEEK} scan ${M0}/test data 0
+EXPECT "^$((${BSIZE} / 2))$" ${SEEK} scan ${M0}/test data $((${BSIZE} / 2))
+EXPECT "^$((${BSIZE} - 1))$" ${SEEK} scan ${M0}/test data $((${BSIZE} - 1))
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data ${BSIZE}
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 511))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 6))
+
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole 0
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} / 2))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} - 1))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole ${BSIZE}
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 6))
+
+rm -f ${SEEK}
+cleanup
+
+# Centos6 regression slaves seem to not support SEEK_DATA/SEEK_HOLE
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/basic/afr/arbiter-mount.t b/tests/basic/afr/arbiter-mount.t
index da99096f81f..404d334d2f9 100644
--- a/tests/basic/afr/arbiter-mount.t
+++ b/tests/basic/afr/arbiter-mount.t
@@ -4,6 +4,9 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../afr.rc
. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
#Check that mounting fails when only arbiter brick is up.
diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t
index 155ffa09ef0..6e0f18b88f8 100644
--- a/tests/basic/afr/durability-off.t
+++ b/tests/basic/afr/durability-off.t
@@ -26,6 +26,8 @@ TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
#Test that fsyncs happen when durability is on
TEST $CLI volume set $V0 cluster.ensure-durability on
TEST $CLI volume set $V0 performance.strict-write-ordering on
diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
new file mode 100644
index 00000000000..7bb6ee14193
--- /dev/null
+++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
@@ -0,0 +1,459 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_file_type {
+ stat -c "%a:%F:%g:%t:%T:%u" $1
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+
+function is_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+zero_xattr="000000000000000000000000"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.use-anonymous-inode off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
+cd $M0
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
+#spb is split-brain, fool is all fool
+
+#source_self_accusing means there exists source and a sink which self-accuses.
+#This simulates failures where fops failed on the bricks without it going down.
+#Something like EACCESS/EDQUOT etc
+
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
+TEST mkfifo source_deletions_heal/fifo
+TEST mknod source_deletions_heal/block b 4 5
+TEST mknod source_deletions_heal/char c 1 5
+TEST touch source_deletions_heal/file
+TEST ln -s source_deletions_heal/file source_deletions_heal/slink
+TEST mkdir source_deletions_heal/dir1
+TEST mkdir source_deletions_heal/dir1/dir2
+
+TEST mkfifo source_deletions_me/fifo
+TEST mknod source_deletions_me/block b 4 5
+TEST mknod source_deletions_me/char c 1 5
+TEST touch source_deletions_me/file
+TEST ln -s source_deletions_me/file source_deletions_me/slink
+TEST mkdir source_deletions_me/dir1
+TEST mkdir source_deletions_me/dir1/dir2
+
+TEST mkfifo source_self_accusing/fifo
+TEST mknod source_self_accusing/block b 4 5
+TEST mknod source_self_accusing/char c 1 5
+TEST touch source_self_accusing/file
+TEST ln -s source_self_accusing/file source_self_accusing/slink
+TEST mkdir source_self_accusing/dir1
+TEST mkdir source_self_accusing/dir1/dir2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
+
+#Test that the files are deleted
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+
+TEST mkfifo source_creations_heal/fifo
+TEST mknod source_creations_heal/block b 4 5
+TEST mknod source_creations_heal/char c 1 5
+TEST touch source_creations_heal/file
+TEST ln -s source_creations_heal/file source_creations_heal/slink
+TEST mkdir source_creations_heal/dir1
+TEST mkdir source_creations_heal/dir1/dir2
+
+TEST mkfifo source_creations_me/fifo
+TEST mknod source_creations_me/block b 4 5
+TEST mknod source_creations_me/char c 1 5
+TEST touch source_creations_me/file
+TEST ln -s source_creations_me/file source_creations_me/slink
+TEST mkdir source_creations_me/dir1
+TEST mkdir source_creations_me/dir1/dir2
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#simulate self-accusing for source_self_accusing
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Check if conservative merges happened correctly on _me_ dirs
+TEST stat spb_me_heal/1
+TEST stat $B0/${V0}0/spb_me_heal/1
+TEST stat $B0/${V0}1/spb_me_heal/1
+
+TEST stat spb_me_heal/0
+TEST stat $B0/${V0}0/spb_me_heal/0
+TEST stat $B0/${V0}1/spb_me_heal/0
+
+TEST stat fool_me/1
+TEST stat $B0/${V0}0/fool_me/1
+TEST stat $B0/${V0}1/fool_me/1
+
+TEST stat fool_me/0
+TEST stat $B0/${V0}0/fool_me/0
+TEST stat $B0/${V0}1/fool_me/0
+
+TEST stat v1_fool_me/0
+TEST stat $B0/${V0}0/v1_fool_me/0
+TEST stat $B0/${V0}1/v1_fool_me/0
+
+TEST stat v1_fool_me/1
+TEST stat $B0/${V0}0/v1_fool_me/1
+TEST stat $B0/${V0}1/v1_fool_me/1
+
+TEST stat v1_dirty_me/0
+TEST stat $B0/${V0}0/v1_dirty_me/0
+TEST stat $B0/${V0}1/v1_dirty_me/0
+
+#Check if files that have gfid-mismatches in _me_ are giving EIO
+TEST ! stat spb_me/0
+
+#Check if stale files are deleted on access
+TEST ! stat source_deletions_me/fifo
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat source_deletions_me/block
+TEST ! stat $B0/${V0}0/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat source_deletions_me/char
+TEST ! stat $B0/${V0}0/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
+TEST ! stat source_deletions_me/dir1
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+
+#Test if the files created as part of access are healed correctly
+r=$(get_file_type source_creations_me/fifo)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
+TEST [ -p source_creations_me/fifo ]
+
+r=$(get_file_type source_creations_me/block)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
+TEST [ -b source_creations_me/block ]
+
+r=$(get_file_type source_creations_me/char)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
+TEST [ -c source_creations_me/char ]
+
+r=$(get_file_type source_creations_me/file)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
+TEST [ -f source_creations_me/file ]
+
+r=$(get_file_type source_creations_me/slink)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
+TEST [ -h source_creations_me/slink ]
+
+r=$(get_file_type source_creations_me/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
+TEST [ -d source_creations_me/dir1/dir2 ]
+
+r=$(get_file_type source_creations_me/dir1)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
+TEST [ -d source_creations_me/dir1 ]
+
+#Trigger heal and check _heal dirs are healed properly
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
+TEST kill_brick $V0 $H0 $B0/${V0}1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST stat spb_heal
+TEST stat spb_me_heal
+TEST stat fool_heal
+TEST stat fool_me
+TEST stat v1_fool_heal
+TEST stat v1_fool_me
+TEST stat source_deletions_heal
+TEST stat source_deletions_me
+TEST stat source_self_accusing
+TEST stat source_creations_heal
+TEST stat source_creations_me
+TEST stat v1_dirty_heal
+TEST stat v1_dirty_me
+TEST $CLI volume stop $V0
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+
+TEST $CLI volume stop $V0;
+
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+$CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0;
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
+
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
+
+#Don't access the files/dirs from mount point as that may cause self-heals
+# Check if conservative merges happened correctly on heal dirs
+TEST stat $B0/${V0}0/spb_heal/1
+TEST stat $B0/${V0}1/spb_heal/1
+
+TEST stat $B0/${V0}0/spb_heal/0
+TEST stat $B0/${V0}1/spb_heal/0
+
+TEST stat $B0/${V0}0/fool_heal/1
+TEST stat $B0/${V0}1/fool_heal/1
+
+TEST stat $B0/${V0}0/fool_heal/0
+TEST stat $B0/${V0}1/fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/0
+TEST stat $B0/${V0}1/v1_fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/1
+TEST stat $B0/${V0}1/v1_fool_heal/1
+
+TEST stat $B0/${V0}0/v1_dirty_heal/0
+TEST stat $B0/${V0}1/v1_dirty_heal/0
+
+#Check if files that have gfid-mismatches in spb are giving EIO
+TEST ! stat spb/0
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}0/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}0/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+#Test if the files created as part of full self-heal correctly
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
+
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
+
+cd -
+
+#Anonymous directory shouldn't be created
+TEST mkdir $M0/rename-dir
+before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/rename-dir $M0/new-name
+TEST $CLI volume start $V0 force
+#'spb' is in split-brain so pending-heal-count will be 2
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+after_rename=$(STAT_INO $B0/${V0}1/new-name)
+EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
+EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
+EXPECT_NOT "$before_rename" echo $after_rename
+cleanup
diff --git a/tests/basic/afr/entry-self-heal.t b/tests/basic/afr/entry-self-heal.t
index 3c900fdcf9a..0c1da7d211e 100644
--- a/tests/basic/afr/entry-self-heal.t
+++ b/tests/basic/afr/entry-self-heal.t
@@ -79,6 +79,9 @@ TEST $CLI volume set $V0 performance.open-behind off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
TEST $CLI volume start $V0
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
index f4aa351e461..35e295dc170 100644
--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
#We know that second brick has the bigger size file
BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1)
-TEST ls $M0/f3
-TEST cat $M0/f3
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
#gfid split-brain should be resolved
@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
-TEST ls $M0/f4
-TEST cat $M0/f4
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
+TEST cat $M0/f4 #Trigger data heal via readv inode refresh
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
#gfid split-brain should be resolved
@@ -227,4 +227,3 @@ HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\ -f1)
TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ]
cleanup;
-#G_TESTDEF_TEST_STATUS_NETBSD7=1501390
diff --git a/tests/basic/afr/gfid-self-heal.t b/tests/basic/afr/gfid-self-heal.t
index b54edbcae85..5a530681186 100644
--- a/tests/basic/afr/gfid-self-heal.t
+++ b/tests/basic/afr/gfid-self-heal.t
@@ -50,6 +50,10 @@ TEST kill_brick $V0 $H0 $B0/${V0}0
TEST touch $M0/a
gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/a)
TEST touch $B0/${V0}0/a
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
$CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST stat $M0/a
@@ -62,6 +66,10 @@ TEST kill_brick $V0 $H0 $B0/${V0}0
TEST touch $M0/b
TEST mkdir $B0/${V0}0/b
TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
$CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
TEST ! stat $M0/b
@@ -71,6 +79,10 @@ TEST "[[ -z \"$gfid_0\" ]]"
#Check gfid assigning doesn't happen when there is type mismatch
TEST touch $B0/${V0}1/c
TEST mkdir $B0/${V0}0/c
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
TEST ! stat $M0/c
gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/c)
gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/c)
@@ -81,6 +93,10 @@ TEST "[[ -z \"$gfid_0\" ]]"
# gfid split-brain
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST touch $B0/${V0}1/d
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
TEST ! stat $M0/d
gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/d)
TEST "[[ -z \"$gfid_1\" ]]"
diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t
index dbf67065930..10b6c6398da 100644
--- a/tests/basic/afr/granular-esh/cli.t
+++ b/tests/basic/afr/granular-esh/cli.t
@@ -108,7 +108,7 @@ TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
TEST $CLI volume reset $V0
# Ensure that granular entry heal is also disabled
EXPECT "no" volume_get_field $V0 cluster.granular-entry-heal
-EXPECT "on" volume_get_field $V0 cluster.entry-self-heal
+EXPECT "off" volume_get_field $V0 cluster.entry-self-heal
cleanup
#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1399038
diff --git a/tests/basic/afr/halo.t b/tests/basic/afr/halo.t
new file mode 100644
index 00000000000..3f61f5a0402
--- /dev/null
+++ b/tests/basic/afr/halo.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#Tests that halo basic functionality works as expected
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_up_child()
+{
+ if [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[0\]") ];
+ then
+ echo 0
+ elif [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[1\]") ]
+ then
+ echo 1
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.halo-enabled yes
+TEST $CLI volume set $V0 cluster.halo-max-replicas 1
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[0\]"
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[1\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]"
+
+up_id=$(get_up_child)
+TEST [[ ! -z "$up_id" ]]
+
+down_id=$((1-up_id))
+
+TEST kill_brick $V0 $H0 $B0/${V0}${up_id}
+#As max-replicas is configured to be 1, down_child should be up now
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]"
+EXPECT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${up_id}\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${down_id}\]"
+
+#Bring the brick back up and the state should be restored
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+
+up_id=$(get_up_child)
+TEST [[ ! -z "$up_id" ]]
+down_id=$((1-up_id))
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]"
+
+cleanup;
diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
new file mode 100644
index 00000000000..256ee2aafce
--- /dev/null
+++ b/tests/basic/afr/rename-data-loss.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+#Self-heal tests
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST `echo "line1" >> file1`
+TEST mkdir dir1
+TEST mkdir dir2
+TEST mkdir -p dir1/dira/dirb
+TEST `echo "line1">>dir1/dira/dirb/file1`
+TEST mkdir delete_me
+TEST `echo "line1" >> delete_me/file1`
+
+#brick0 has witnessed the second write while brick1 is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST `echo "line2" >> file1`
+TEST `echo "line2" >> dir1/dira/dirb/file1`
+TEST `echo "line2" >> delete_me/file1`
+
+#Toggle the bricks that are up/down.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/brick0
+
+#Rename when the 'source' brick0 for data-selfheals is down.
+mv file1 file2
+mv dir1/dira dir2
+
+#Delete a dir when brick0 is down.
+rm -rf delete_me
+cd -
+
+#Bring everything up and trigger heal
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
+
+#Remount to avoid reading from caches
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "line2" tail -1 $M0/file2
+EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
+TEST ! stat $M0/delete_me/file1
+TEST ! stat $M0/delete_me
+
+anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/brick0/$anon_inode_name ]]
+TEST [[ -d $B0/brick1/$anon_inode_name ]]
+cleanup
diff --git a/tests/basic/afr/root-squash-self-heal.t b/tests/basic/afr/root-squash-self-heal.t
index c4fab0a35b2..6e12098465a 100644
--- a/tests/basic/afr/root-squash-self-heal.t
+++ b/tests/basic/afr/root-squash-self-heal.t
@@ -11,6 +11,9 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 self-heal-daemon off
TEST $CLI volume set $V0 server.root-squash on
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
TEST $CLI volume start $V0
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --no-root-squash=yes --use-readdirp=no $M0
TEST kill_brick $V0 $H0 $B0/${V0}0
diff --git a/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t
new file mode 100644
index 00000000000..7c249c4bcbd
--- /dev/null
+++ b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+#Test the client side split-brain resolution
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+
+count_files () {
+ ls $1 | wc -l
+}
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume set $V0 cluster.quorum-count 1
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST mkdir $M0/data
+TEST touch $M0/data/file
+
+
+############ Client side healing using favorite-child-policy = mtime #################
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+mtime1=$(get_mtime $B0/${V0}0/data/file)
+mtime2=$(get_mtime $B0/${V0}1/data/file)
+if (( $(echo "$mtime1 > $mtime2" | bc -l) )); then
+ LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1)
+else
+ LATEST_MTIME_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1)
+fi
+
+#file will be in split-brain
+cat $M0/data/file > /dev/null
+EXPECT "1" echo $?
+
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0
+cat $M0/data/file > /dev/null
+EXPECT "0" echo $?
+M0_MD5=$(md5sum $M0/data/file | cut -d\ -f1)
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0
+TEST [ "$LATEST_MTIME_MD5" == "$M0_MD5" ]
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1)
+TEST [ "$LATEST_MTIME_MD5" == "$B0_MD5" ]
+TEST [ "$LATEST_MTIME_MD5" == "$B1_MD5" ]
+
+############ Client side directory conservative merge #################
+TEST $CLI volume reset $V0 cluster.favorite-child-policy
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/data/test
+files=$(count_files $M0/data)
+EXPECT "2" echo $files
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/data/test1
+files=$(count_files $M0/data)
+EXPECT "2" echo $files
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#data dir will be in entry split-brain
+ls $M0/data > /dev/null
+EXPECT "2" echo $?
+
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0
+
+
+ls $M0/data > /dev/null
+EXPECT "0" echo $?
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0
+#Entry Split-brain is gone, but data self-heal is pending on the files
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+cat $M0/data/test > /dev/null
+cat $M0/data/test1 > /dev/null
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+files=$(count_files $M0/data)
+EXPECT "3" echo $files
+
+TEST force_umount $M0
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+
+cleanup
diff --git a/tests/basic/afr/split-brain-heal-info.t b/tests/basic/afr/split-brain-heal-info.t
index 66275c57207..2e4742fff08 100644
--- a/tests/basic/afr/split-brain-heal-info.t
+++ b/tests/basic/afr/split-brain-heal-info.t
@@ -47,9 +47,11 @@ SPB_FILES=$(($SPB_FILES + 1))
#### Simulate entry-split-brain
TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
TEST touch $M0/espb/a
volume_start_force $V0
TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
TEST mkdir $M0/espb/a
volume_start_force $V0
SPB_FILES=$(($SPB_FILES + 1))
diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t
new file mode 100644
index 00000000000..676788fce3f
--- /dev/null
+++ b/tests/basic/afr/split-brain-healing-ctime.t
@@ -0,0 +1,252 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_replicate_subvol_number {
+ local filename=$1
+ #get_backend_paths
+ if [ -f $B0/${V0}1/$filename ]
+ then
+ echo 0
+ elif [ -f $B0/${V0}3/$filename ]
+ then echo 1
+ else
+ echo -1
+ fi
+}
+
+cleanup;
+
+AREQUAL_PATH=$(dirname $0)/../../utils
+GET_MDATA_PATH=$(dirname $0)/../../utils
+CFLAGS=""
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+cd $M0
+for i in {1..10}
+do
+ echo "Initial content">>file$i
+done
+
+replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`)
+replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`)
+
+############ Create data split-brain in the files. ###########################
+TEST kill_brick $V0 $H0 $B0/${V0}1
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B1 is down">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}3
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B3 is down">>${replica_1_files_list[$file]}
+done
+
+SMALLER_FILE_SIZE=$(stat -c %s file1)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B2 is down">>${replica_0_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}4
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B4 is down">>${replica_1_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]}
+done
+
+BIGGER_FILE_SIZE=$(stat -c %s file1)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+############### Acessing the files should now give EIO. ###############################
+TEST ! cat file1
+TEST ! cat file2
+TEST ! cat file3
+TEST ! cat file4
+TEST ! cat file5
+TEST ! cat file6
+TEST ! cat file7
+TEST ! cat file8
+TEST ! cat file9
+TEST ! cat file10
+###################
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+################ Heal file1 using the bigger-file option ##############
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "0" echo $?
+EXPECT $BIGGER_FILE_SIZE stat -c %s file1
+
+################ Heal file2 using the bigger-file option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file2)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2)
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2)
+fi
+GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+$CLI volume heal $V0 split-brain bigger-file $GFIDSTR
+EXPECT "0" echo $?
+
+################ Heal file3 using the source-brick option ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file3)
+if [ $subvolume == 0 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3
+elif [ $subvolume == 1 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file3
+
+################ Heal file4 using the source-brick option and it's gfid ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file4)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file4
+
+# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed
+# as part of metadata heal. So mtime would be same, hence it can't be healed
+# using 'latest-mtime' policy, use 'source-brick' option instead.
+################ Heal file5 using the source-brick option ##############
+subvolume=$(get_replicate_subvol_number file5)
+if [ $subvolume == 0 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5
+elif [ $subvolume == 1 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5
+fi
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file5)
+
+TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
+
+################ Heal file6 using the source-brick option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file6)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR
+fi
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file6)
+
+TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
+
+################ Heal remaining SB'ed files of replica_0 using B1 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1
+EXPECT "0" echo $?
+
+################ Heal remaining SB'ed files of replica_1 using B3 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3
+EXPECT "0" echo $?
+
+############### Reading the files should now succeed. ###############################
+TEST cat file1
+TEST cat file2
+TEST cat file3
+TEST cat file4
+TEST cat file5
+TEST cat file6
+TEST cat file7
+TEST cat file8
+TEST cat file9
+TEST cat file10
+
+################ File contents on the bricks must be same. ################################
+TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs)
+TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs)
+
+############### Trying to heal files not in SB should fail. ###############################
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "1" echo $?
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+EXPECT "1" echo $?
+
+cd -
+TEST rm $AREQUAL_PATH/arequal-checksum
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+cleanup
diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
index c80f900b909..315e815eb7e 100644
--- a/tests/basic/afr/split-brain-healing.t
+++ b/tests/basic/afr/split-brain-healing.t
@@ -20,11 +20,14 @@ function get_replicate_subvol_number {
cleanup;
AREQUAL_PATH=$(dirname $0)/../../utils
+GET_MDATA_PATH=$(dirname $0)/../../utils
CFLAGS=""
test "`uname -s`" != "Linux" && {
CFLAGS="$CFLAGS -lintl";
}
build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
@@ -32,6 +35,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 cluster.data-self-heal off
TEST $CLI volume set $V0 cluster.metadata-self-heal off
TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 ctime off
TEST $CLI volume start $V0
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
@@ -152,13 +156,13 @@ EXPECT $SMALLER_FILE_SIZE stat -c %s file4
subvolume=$(get_replicate_subvol_number file5)
if [ $subvolume == 0 ]
then
- mtime1=$(stat -c %Y $B0/${V0}1/file5)
- mtime2=$(stat -c %Y $B0/${V0}2/file5)
+ mtime1=$(get_mtime $B0/${V0}1/file5)
+ mtime2=$(get_mtime $B0/${V0}2/file5)
LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
elif [ $subvolume == 1 ]
then
- mtime1=$(stat -c %Y $B0/${V0}3/file5)
- mtime2=$(stat -c %Y $B0/${V0}4/file5)
+ mtime1=$(get_mtime $B0/${V0}3/file5)
+ mtime2=$(get_mtime $B0/${V0}4/file5)
LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
fi
$CLI volume heal $V0 split-brain latest-mtime /file5
@@ -166,12 +170,12 @@ EXPECT "0" echo $?
if [ $subvolume == 0 ]
then
- mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file5)
- mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file5)
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
elif [ $subvolume == 1 ]
then
- mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file5)
- mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file5)
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
fi
#TODO: To below comparisons on full sub-second resolution
@@ -188,14 +192,14 @@ subvolume=$(get_replicate_subvol_number file6)
if [ $subvolume == 0 ]
then
GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
- mtime1=$(stat -c %Y $B0/${V0}1/file6)
- mtime2=$(stat -c %Y $B0/${V0}2/file6)
+ mtime1=$(get_mtime $B0/${V0}1/file6)
+ mtime2=$(get_mtime $B0/${V0}2/file6)
LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
elif [ $subvolume == 1 ]
then
GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
- mtime1=$(stat -c %Y $B0/${V0}3/file6)
- mtime2=$(stat -c %Y $B0/${V0}4/file6)
+ mtime1=$(get_mtime $B0/${V0}3/file6)
+ mtime2=$(get_mtime $B0/${V0}4/file6)
LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
fi
GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
@@ -204,12 +208,12 @@ EXPECT "0" echo $?
if [ $subvolume == 0 ]
then
- mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file6)
- mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file6)
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
elif [ $subvolume == 1 ]
then
- mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file6)
- mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file6)
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
fi
#TODO: To below comparisons on full sub-second resolution
@@ -253,4 +257,5 @@ EXPECT "1" echo $?
cd -
TEST rm $AREQUAL_PATH/arequal-checksum
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
cleanup
diff --git a/tests/basic/afr/split-brain-resolution.t b/tests/basic/afr/split-brain-resolution.t
index dbe665e3e51..834237c96ec 100644
--- a/tests/basic/afr/split-brain-resolution.t
+++ b/tests/basic/afr/split-brain-resolution.t
@@ -11,6 +11,9 @@ function get_split_brain_status {
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
TEST $CLI volume start $V0
#Disable self-heal-daemon
@@ -71,6 +74,18 @@ TEST setfattr -n replica.split-brain-choice -v none $M0/data-split-brain.txt
TEST ! getfattr -n user.test $M0/metadata-split-brain.txt
TEST ! cat $M0/data-split-brain.txt
+#Check that after timeout fops result in EIO again.
+#Set one minute timeout
+TEST setfattr -n replica.split-brain-choice-timeout -v 1 $M0/
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/data-split-brain.txt
+EXPECT "brick1_alive" cat $M0/data-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/metadata-split-brain.txt
+EXPECT "brick0" get_text_xattr user.test $M0/metadata-split-brain.txt
+#Wait until timeout completes and test that the fops fail again
+sleep 62
+TEST ! getfattr -n user.test $M0/metadata-split-brain.txt
+TEST ! cat $M0/data-split-brain.txt
+
#Negative test cases should fail
TEST ! setfattr -n replica.split-brain-choice -v $V0-client-4 $M0/data-split-brain.txt
TEST ! setfattr -n replica.split-brain-heal-finalize -v $V0-client-4 $M0/metadata-split-brain.txt
diff --git a/tests/basic/afr/ta-check-locks.t b/tests/basic/afr/ta-check-locks.t
new file mode 100644
index 00000000000..c0102c35b7b
--- /dev/null
+++ b/tests/basic/afr/ta-check-locks.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+#This test checks if all the locks on
+#ta file are being held and released properly
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+
+function get_lock_count_on_ta()
+{
+ tapid=`cat $B0/ta.pid`
+ local sfile=$(generate_statedump $tapid)
+ count=$(grep "inodelk-count" $sfile | cut -f2 -d'=' | tail -1)
+ ncount=$(grep "inodelk.inodelk" $sfile | grep "len=1" | wc -l)
+ echo "count = $count : ncount = $ncount"
+ if [ "$count" = "" ]
+ then
+ count=0
+ fi
+
+ if [ "$count" -eq "$ncount" ]
+ then
+ echo "$count"
+ else
+ echo "-1"
+ fi
+}
+
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+TEST ta_start_mount_process $M1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M1 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST ta_create_shd_volfile brick0 brick1 ta
+TEST ta_start_shd_process glustershd
+shd_pid=$(cat $B0/glustershd.pid)
+
+TEST touch $M0/a.txt
+echo "Hello" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "0" get_lock_count_on_ta
+
+TEST ta_kill_brick brick0
+echo "Hello" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "1" get_lock_count_on_ta
+
+echo "Hello" >> $M1/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+echo "xyz" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+chmod 0666 $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_lock_count_on_ta
+
+cleanup;
diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t
index f2b3c38e06c..3cfc16b9b8a 100644
--- a/tests/basic/afr/ta-read.t
+++ b/tests/basic/afr/ta-read.t
@@ -25,31 +25,35 @@ TEST ! ls $B0/ta/FILE
# Kill one brick and write to FILE.
TEST ta_kill_brick brick0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
echo "brick0 down">> $M0/FILE
TEST [ $? -eq 0 ]
EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/FILE
EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
#Umount and mount to remove cached data.
-TEST umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST ta_start_mount_process $M0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
# Read must be allowed since good brick is up.
TEST cat $M0/FILE
+#Umount and mount to remove cached data.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
# Toggle good and bad data brick processes.
TEST ta_start_brick_process brick0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
TEST ta_kill_brick brick1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
# Read must now fail.
TEST ! cat $M0/FILE
# Bring all data bricks up, and kill TA.
TEST ta_start_brick_process brick1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
TA_PID=$(ta_get_pid_by_brick_name ta)
TEST [ -n $TA_PID ]
TEST ta_kill_brick ta
diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t
index bb2e58b3f77..96ecfc678e0 100644
--- a/tests/basic/afr/ta-shd.t
+++ b/tests/basic/afr/ta-shd.t
@@ -22,7 +22,7 @@ TEST ta_start_shd_process glustershd
TEST touch $M0/a.txt
TEST ta_kill_brick brick0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
echo "Hello" >> $M0/a.txt
EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt
EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2
@@ -33,14 +33,14 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/
#the SHD process.
TEST ta_start_brick_process brick0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt
EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2
#Kill the previously up brick and try reading from other brick. Since the heal
#has happened file content should be same.
TEST ta_kill_brick brick1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
#Umount and mount to remove cached data.
TEST umount $M0
TEST ta_start_mount_process $M0
diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t
index 18cb65b3a76..096ca9f47cf 100644
--- a/tests/basic/afr/ta-write-on-bad-brick.t
+++ b/tests/basic/afr/ta-write-on-bad-brick.t
@@ -26,26 +26,26 @@ TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
#Good Data brick is down. TA and bad brick are UP
TEST ta_kill_brick brick1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
TEST ta_kill_brick brick0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
TEST ta_start_brick_process brick1
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5
# Good Data brick is UP. Bad and TA are down
TEST ta_kill_brick brick1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
TEST ta_start_brick_process brick0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
TEST ta_kill_brick ta
TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5
# Good and Bad data bricks are UP. TA is down
TEST ta_start_brick_process brick1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
cleanup;
diff --git a/tests/basic/afr/ta.t b/tests/basic/afr/ta.t
index d8beb1be461..05d48431c95 100644
--- a/tests/basic/afr/ta.t
+++ b/tests/basic/afr/ta.t
@@ -27,18 +27,28 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replica
TEST touch $M0/b.txt
EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1
EXPECT "000000010000000200000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/b.txt
-#TODO: New entry mark lead to pending data on the file but no such marking happened on ta
-#EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
+#New entry mark lead to pending data on the file and on ta
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
TEST ! ls $B0/brick0/b.txt
TEST ls $B0/brick1/b.txt
+#Try to create an entry while good brick is down and bad brick is UP. Should not create
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ! touch $M0/d.txt
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
+
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+
TEST ta_kill_brick ta
-# Entry create must fail since only 1 brick is up.
+# Entry create must fail if only one brick is UP, even if that is a good brick.
TEST ! touch $M0/c.txt
TEST ! ls $B0/brick0/c.txt
TEST ! ls $B0/brick1/c.txt
-TEST ta_start_brick_process brick0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
-
cleanup;
diff --git a/tests/basic/afr/tarissue.t b/tests/basic/afr/tarissue.t
index a35d468eb0c..83f7463130c 100644
--- a/tests/basic/afr/tarissue.t
+++ b/tests/basic/afr/tarissue.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
TESTS_EXPECTED_IN_LOOP=10
cleanup;
diff --git a/tests/basic/all_squash.t b/tests/basic/all_squash.t
new file mode 100644
index 00000000000..29766c50af7
--- /dev/null
+++ b/tests/basic/all_squash.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+# random uid/gid
+uid=22162
+gid=5845
+
+TEST $CLI volume set $V0 server.anonuid $uid;
+TEST $CLI volume set $V0 server.anongid $gid;
+
+# Ensure server.all-squash is disabled
+TEST $CLI volume set $V0 server.all-squash disable;
+
+# Tests for the fuse mount
+mkdir $M0/other;
+chown $uid:$gid $M0/other;
+
+TEST $CLI volume set $V0 server.all-squash enable;
+
+touch $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST touch $M0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $M0/other/file)" = "$uid:$gid" ];
+TEST mkdir $M0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $M0/other/dir)" = "$uid:$gid" ];
+
+TEST $CLI volume set $V0 server.all-squash disable;
+TEST rm -rf $M0/other;
+
+sleep 1;
+
+# tests for nfs mount
+mkdir $N0/other;
+chown $uid:$gid $N0/other;
+
+TEST $CLI volume set $V0 server.all-squash enable;
+
+touch $N0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST touch $N0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/file)" = "$uid:$gid" ];
+TEST mkdir $N0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/dir)" = "$uid:$gid" ];
+
+TEST $CLI volume set $V0 server.all-squash disable;
+TEST rm -rf $N0/other;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/changelog/changelog-api.t b/tests/basic/changelog/changelog-api.t
new file mode 100644
index 00000000000..516c2f2f60d
--- /dev/null
+++ b/tests/basic/changelog/changelog-api.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../env.rc
+
+cleanup;
+
+CHANGELOG_BIN_PATH=$(dirname $0)/../../utils/changelog
+build_tester $CHANGELOG_BIN_PATH/test-changelog-api.c -lgfchangelog
+
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+sleep 3;
+
+#Listen to changelog journal notifcations
+$CHANGELOG_BIN_PATH/test-changelog-api &
+for i in {1..12};do echo "data" > $M0/file$i 2>/dev/null; sleep 1;done &
+
+#Wait for changelogs to be in .processed directory
+sleep 12
+
+EXPECT "Y" processed_changelogs "/tmp/scratch_v1/.processed"
+TEST rm $CHANGELOG_BIN_PATH/test-changelog-api
+rm -rf /tmp/scratch_v1
+
+cleanup;
diff --git a/tests/basic/changelog/changelog-history.t b/tests/basic/changelog/changelog-history.t
index 3ce40981c90..ea952619652 100644
--- a/tests/basic/changelog/changelog-history.t
+++ b/tests/basic/changelog/changelog-history.t
@@ -5,6 +5,7 @@
cleanup;
+SCRIPT_TIMEOUT=300
HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog
build_tester $HISTORY_BIN_PATH/get-history.c -lgfchangelog
@@ -68,19 +69,23 @@ TEST $CLI volume set $V0 changelog.changelog off
sleep 3
time_after_disable=$(date '+%s')
+TEST $CLI volume set $V0 changelog.changelog on
+sleep 5
+
#Passes, gives the changelogs till continuous changelogs are available
# but returns 1
-EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2
+EXPECT_WITHIN 10 "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2
#Fails as start falls between htime files
-EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1
+EXPECT_WITHIN 10 "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1
#Passes as start and end falls in same htime file
-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2
+EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2
#Passes, gives the changelogs till continuous changelogs are available
-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable
+EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable
TEST rm $HISTORY_BIN_PATH/get-history
+rm -rf /tmp/scratch_v1/*
cleanup;
diff --git a/tests/basic/changelog/history-api.t b/tests/basic/changelog/history-api.t
new file mode 100644
index 00000000000..9e63118cef9
--- /dev/null
+++ b/tests/basic/changelog/history-api.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../env.rc
+
+cleanup;
+
+HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog
+build_tester $HISTORY_BIN_PATH/test-history-api.c -lgfchangelog
+
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+sleep 3
+start=$(date '+%s')
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/file{1..10}
+
+for i in {1..12};do echo "data" > $M0/file$i; sleep 1;done
+end=$(date '+%s')
+sleep 2
+
+#Passes as start and end falls in same htime file
+EXPECT "0" $HISTORY_BIN_PATH/test-history-api $start $end
+
+#Wait for changelogs to be in .processed directory
+sleep 2
+
+EXPECT "Y" processed_changelogs "/tmp/scratch_v1/.history/.processed"
+TEST rm $HISTORY_BIN_PATH/test-history-api
+rm -rf /tmp/scratch_v1
+
+cleanup;
diff --git a/tests/basic/cloudsync-sanity.t b/tests/basic/cloudsync-sanity.t
index 3cf719da011..834ba96430c 100644
--- a/tests/basic/cloudsync-sanity.t
+++ b/tests/basic/cloudsync-sanity.t
@@ -19,4 +19,11 @@ TEST $GFS -s $H0 --volfile-id $V0 $M1;
# create operations
TEST $(dirname $0)/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
cleanup;
diff --git a/tests/basic/ctime/ctime-ec-heal.t b/tests/basic/ctime/ctime-ec-heal.t
new file mode 100644
index 00000000000..142237c5014
--- /dev/null
+++ b/tests/basic/ctime/ctime-ec-heal.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{1..3}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+echo "Initial content" > $M0/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+# Kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+echo "B3 is down" >> $M0/file1
+echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
+echo "Entry heal file" > $M0/entry_heal_file1
+mkdir $M0/entry_heal_dir1
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+TEST $CLI volume start $V0 force
+$CLI volume heal $V0
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-ec-rebalance.t b/tests/basic/ctime/ctime-ec-rebalance.t
new file mode 100644
index 00000000000..2b73bcdd103
--- /dev/null
+++ b/tests/basic/ctime/ctime-ec-rebalance.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# Create files
+mkdir $M0/dir1
+echo "test data" > $M0/dir1/file1
+
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Verify ctime xattr heal on directory
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
+
+b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-mdata-legacy-files.t b/tests/basic/ctime/ctime-mdata-legacy-files.t
new file mode 100644
index 00000000000..2e782d5c99d
--- /dev/null
+++ b/tests/basic/ctime/ctime-mdata-legacy-files.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+###############################################################################
+#Replica volume
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr
+TEST $CLI volume set $V0 ctime off
+
+TEST "mkdir $M0/DIR"
+TEST "echo hello_world > $M0/DIR/FILE"
+
+#Verify absence of xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE"
+
+#Enable ctime
+TEST $CLI volume set $V0 ctime on
+sleep 3
+TEST stat $M0/DIR/FILE
+
+#Verify presence "trusted.glusterfs.mdata" xattr on backend
+#The lookup above should have created xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE"
+
+###############################################################################
+#Disperse Volume
+
+TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
+TEST $CLI volume set $V1 performance.stat-prefetch off
+TEST $CLI volume start $V1
+
+TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M1;
+
+#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr
+TEST $CLI volume set $V1 ctime off
+TEST "mkdir $M1/DIR"
+TEST "echo hello_world > $M1/DIR/FILE"
+
+#Verify absence of xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE"
+
+#Enable ctime
+TEST $CLI volume set $V1 ctime on
+sleep 3
+TEST stat $M1/DIR/FILE
+
+#Verify presence "trusted.glusterfs.mdata" xattr on backend
+#The lookup above should have created xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE"
+
+cleanup;
+###############################################################################
diff --git a/tests/basic/ctime/ctime-readdir.c b/tests/basic/ctime/ctime-readdir.c
new file mode 100644
index 00000000000..8760db29ae8
--- /dev/null
+++ b/tests/basic/ctime/ctime-readdir.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <assert.h>
+
+int
+main(int argc, char **argv)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ int ret = 0;
+ char *path = NULL;
+
+ assert(argc == 2);
+ path = argv[1];
+
+ dir = opendir(path);
+ if (!dir) {
+ printf("opendir(%s) failed.\n", path);
+ return -1;
+ }
+
+ while ((entry = readdir(dir)) != NULL) {
+ }
+ if (dir)
+ closedir(dir);
+
+ return ret;
+}
diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t
new file mode 100644
index 00000000000..4564fc1b667
--- /dev/null
+++ b/tests/basic/ctime/ctime-readdir.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
+TEST $CLI volume set $V0 performance.stat-prefetch on
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST mkdir $M0/dir0
+TEST "echo hello_world > $M0/dir0/FILE"
+
+ctime1=$(stat -c %Z $M0/dir0/FILE)
+echo "Mount change time: $ctime1"
+
+sleep 2
+
+#Write to back end directly to modify ctime of backend file
+TEST "echo write_from_backend >> $B0/brick1/dir0/FILE"
+TEST "echo write_from_backend >> $B0/brick2/dir0/FILE"
+TEST "echo write_from_backend >> $B0/brick3/dir0/FILE"
+echo "Backend change time"
+echo "brick1: $(stat -c %Z $B0/brick1/dir0/FILE)"
+echo "brick2: $(stat -c %Z $B0/brick2/dir0/FILE)"
+echo "brick3: $(stat -c %Z $B0/brick3/dir0/FILE)"
+
+#Stop and start to hit the case of no inode for readdir
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST build_tester $(dirname $0)/ctime-readdir.c
+
+#Do readdir
+TEST ./$(dirname $0)/ctime-readdir $M0/dir0
+
+EXPECT "$ctime1" stat -c %Z $M0/dir0/FILE
+echo "Mount change time after readdir $(stat -c %Z $M0/dir0/FILE)"
+
+cleanup_tester $(dirname $0)/ctime-readdir
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-rep-heal.t b/tests/basic/ctime/ctime-rep-heal.t
new file mode 100644
index 00000000000..20517c74971
--- /dev/null
+++ b/tests/basic/ctime/ctime-rep-heal.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+echo "Initial content" > $M0/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+# Kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+echo "B3 is down" >> $M0/file1
+echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
+echo "Entry heal file" > $M0/entry_heal_file1
+mkdir $M0/entry_heal_dir1
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+TEST $CLI volume start $V0 force
+$CLI volume heal $V0
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-rep-rebalance.t b/tests/basic/ctime/ctime-rep-rebalance.t
new file mode 100644
index 00000000000..866cf87e6cb
--- /dev/null
+++ b/tests/basic/ctime/ctime-rep-rebalance.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Verify ctime xattr heal on directory
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
+
+b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t
new file mode 100644
index 00000000000..540e57aec83
--- /dev/null
+++ b/tests/basic/ctime/ctime-utimesat.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-after-open off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+touch $M0/FILE
+
+atime=$(stat -c "%.X" $M0/FILE)
+EXPECT $atime stat -c "%.Y" $M0/FILE
+EXPECT $atime stat -c "%.Z" $M0/FILE
+
+cleanup
diff --git a/tests/basic/distribute/brick-down.t b/tests/basic/distribute/brick-down.t
new file mode 100644
index 00000000000..522ccc07210
--- /dev/null
+++ b/tests/basic/distribute/brick-down.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+. $(dirname $0)/../../dht.rc
+
+# Test 1 overview:
+# ----------------
+# Test whether lookups are sent after a brick comes up again
+#
+# 1. Create a 3 brick pure distribute volume
+# 2. Fuse mount the volume so the layout is set on the root
+# 3. Kill one brick and try to create a directory which hashes to that brick.
+# It should fail with EIO.
+# 4. Restart the brick that was killed.
+# 5. Do not remount the volume. Try to create the same directory as in step 3.
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
+TEST $CLI volume start $V0
+
+# We want the lookup to reach DHT
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+# Mount using FUSE and lookup the mount so a layout is set on the brick root
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+
+TEST mkdir $M0/level1
+
+# Find a dirname that will hash to the brick we are going to kill
+hashed=$V0-client-1
+TEST dht_first_filename_with_hashsubvol "$hashed" $M0 "dir-"
+roottestdir=$fn_return_val
+
+hashed=$V0-client-1
+TEST dht_first_filename_with_hashsubvol "$hashed" $M0/level1 "dir-"
+level1testdir=$fn_return_val
+
+
+TEST kill_brick $V0 $H0 $B0/$V0-2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-2
+
+TEST $CLI volume status $V0
+
+
+# Unmount and mount the volume again so dht has an incomplete in memory layout
+
+umount -f $M0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+
+mkdir $M0/$roottestdir
+TEST [ $? -ne 0 ]
+
+mkdir $M0/level1/$level1testdir
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-2
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+
+mkdir $M0/$roottestdir
+TEST [ $? -eq 0 ]
+
+mkdir $M0/$level1/level1testdir
+TEST [ $? -eq 0 ]
+
+# Cleanup
+cleanup
+
+
diff --git a/tests/basic/distribute/dir-heal.t b/tests/basic/distribute/dir-heal.t
new file mode 100644
index 00000000000..851f765b245
--- /dev/null
+++ b/tests/basic/distribute/dir-heal.t
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test 1 overview:
+# ----------------
+#
+# 1. Kill one brick of the volume.
+# 2. Create directories and change directory properties.
+# 3. Bring up the brick and access the directory
+# 4. Check the permissions and xattrs on the backend
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
+TEST $CLI volume start $V0
+
+# We want the lookup to reach DHT
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+# Mount using FUSE , kill a brick and create directories
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+cd $M0
+
+TEST kill_brick $V0 $H0 $B0/$V0-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
+
+TEST mkdir dir{1..4}
+
+# No change for dir1
+# Change permissions for dir2
+# Set xattr on dir3
+# Change permissions and set xattr on dir4
+
+TEST chmod 777 $M0/dir2
+
+TEST setfattr -n "user.test" -v "test" $M0/dir3
+
+TEST chmod 777 $M0/dir4
+TEST setfattr -n "user.test" -v "test" $M0/dir4
+
+
+# Start all bricks
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+stat $M0/dir* > /dev/null
+
+# Check that directories have been created on the brick that was killed
+
+TEST ls $B0/$V0-1/dir1
+
+TEST ls $B0/$V0-1/dir2
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
+
+TEST ls $B0/$V0-1/dir3
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
+
+
+TEST ls $B0/$V0-1/dir4
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
+
+
+TEST rm -rf $M0/*
+
+cd
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+
+# Test 2 overview:
+# ----------------
+# 1. Create directories with all bricks up.
+# 2. Kill a brick and change directory properties and set user xattr.
+# 2. Bring up the brick and access the directory
+# 3. Check the permissions and xattrs on the backend
+
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+cd $M0
+TEST mkdir dir{1..4}
+
+TEST kill_brick $V0 $H0 $B0/$V0-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
+
+# No change for dir1
+# Change permissions for dir2
+# Set xattr on dir3
+# Change permissions and set xattr on dir4
+
+TEST chmod 777 $M0/dir2
+
+TEST setfattr -n "user.test" -v "test" $M0/dir3
+
+TEST chmod 777 $M0/dir4
+TEST setfattr -n "user.test" -v "test" $M0/dir4
+
+
+# Start all bricks
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+stat $M0/dir* > /dev/null
+
+# Check directories on the brick that was killed
+
+TEST ls $B0/$V0-1/dir2
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
+
+TEST ls $B0/$V0-1/dir3
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
+
+
+TEST ls $B0/$V0-1/dir4
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
+cd
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/file-rename.t b/tests/basic/distribute/file-rename.t
new file mode 100644
index 00000000000..63111b8ad8f
--- /dev/null
+++ b/tests/basic/distribute/file-rename.t
@@ -0,0 +1,1021 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview:
+# Test all combinations of src-hashed/src-cached/dst-hashed/dst-cached
+
+hashdebugxattr="dht.file.hashed-subvol."
+
+function get_brick_index {
+ local inpath=$1
+ brickroot=$(getfattr -m . -n trusted.glusterfs.pathinfo $inpath | tr ' ' '\n' | sed -n 's/<POSIX(\(.*\)):.*:.*>.*/\1/p')
+ echo ${brickroot:(-1)}
+}
+
+function get_brick_path_for_subvol {
+ local in_subvol=$1
+ local in_brickpath
+
+ in_brickpath=$(cat "$M0/.meta/graphs/active/$in_subvol/options/remote-subvolume")
+ echo $in_brickpath
+
+}
+
+#Checks that file exists only on hashed and/or cached
+function file_existence_check
+{
+ local in_file_path=$1
+ local in_hashed=$2
+ local in_cached=$3
+ local in_client_subvol
+ local in_brickpath
+ local ret
+
+ for i in {0..3}
+ do
+ in_client_subvol="$V0-client-$i"
+ in_brickpath=$(cat "$M0/.meta/graphs/active/$in_client_subvol/options/remote-subvolume")
+ stat "$in_brickpath/$in_file_path" 2>/dev/null
+ ret=$?
+ # Either the linkto or the data file must exist on the hashed
+ if [ "$in_client_subvol" == "$in_hashed" ]; then
+ if [ $ret -ne 0 ]; then
+ return 1
+ fi
+ continue
+ fi
+
+ # If the cached is non-null, we expect the file to exist on it
+ if [ "$in_client_subvol" == "$in_cached" ]; then
+ if [ $ret -ne 0 ]; then
+ return 1
+ fi
+ continue
+ fi
+
+ if [ $ret -eq 0 ]; then
+ return 2
+ fi
+ done
+ return 0
+}
+
+
+# Check if file exists on any of the bricks of the volume
+function file_does_not_exist
+{
+ local inpath=$1
+ for i in `seq 0 3`
+ do
+ file_path=$B0/$V0-$i/$inpath
+ if [ -f "$file_path" ]; then
+ echo "1"
+ return 1
+ fi
+ done
+ return 0
+}
+
+
+# Input: filename dirpath
+function get_hash_subvol
+{
+ hash_subvol=$(getfattr --only-values -n "$hashdebugxattr$1" $2 2>/dev/null)
+}
+
+
+
+# Find the first filename that hashes to a subvol
+# other than $1
+
+function first_filename_with_diff_hashsubvol
+{
+ local in_subvol=$1
+ local in_path=$2
+ local file_pattern=$3
+ local in_hash_subvol
+
+ for i in {1..100}
+ do
+ dstfilename="$file_pattern$i"
+ in_hash_subvol=$(get_hash_subvol "$dstfilename" "$in_path")
+ echo $in_hash_subvol
+ if [ "$in_subvol" != "$in_hash_subvol" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+# Find the first filename that hashes to the same subvol
+# as $1
+function first_filename_with_same_hashsubvol
+{
+ local in_subvol=$1
+ local in_path=$2
+ local in_hash_subvol
+ local file_pattern=$3
+
+ for i in {1..100}
+ do
+ dstfilename="$file_pattern$i"
+ get_hash_subvol "$dstfilename" "$in_path"
+ in_hash_subvol=$hash_subvol
+# echo $in_hash_subvol
+ if [ "$in_subvol" == "$in_hash_subvol" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+function file_is_linkto
+{
+ local brick_filepath=$1
+
+ test=$(stat $brick_filepath 2>&1)
+ if [ $? -ne 0 ]; then
+ echo "2"
+ return
+ fi
+
+ test=$(getfattr -n trusted.glusterfs.dht.linkto -e text $brick_filepath 2>&1)
+
+ if [ $? -eq 0 ]; then
+ echo "1"
+ else
+ echo "0"
+ fi
+}
+
+
+
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+
+# We need at least 4 bricks to test all combinations of hashed and
+# cached files
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{0..3}
+TEST $CLI volume start $V0
+
+# Mount using FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+
+################################################################
+# The first set of tests are those where the Dst file does not exist
+# dst-cached = NULL
+#
+###############################################################
+
+################### Test 1 ####################################
+#
+# src-hashed = src-cached = dst-hashed
+# dst-cached = null
+# src-file = src-1
+
+echo " **** Test 1 **** "
+
+src_file="src-1"
+
+TEST mkdir $M0/test-1
+TEST touch $M0/test-1/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-1
+src_hashed=$hash_subvol
+#echo "Hashed subvol for $src_file: " $src_hashed
+
+# Find a file name that hashes to the same subvol as $src_file
+TEST first_filename_with_same_hashsubvol "$src_hashed" "$M0/test-1" "dst-"
+#echo "dst-file name: " $dstfilename
+dst_hashed=$src_hashed
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-1/$src_file $M0/test-1/$dstfilename
+
+# Expected:
+# dst file is accessible from the mount point
+# dst file exists only on the hashed brick.
+# no linkto files on any bricks
+# src files do not exist
+
+
+TEST stat $M0/test-1/$dstfilename 2>/dev/null
+TEST file_existence_check test-1/$dstfilename $src_hashed
+TEST file_does_not_exist test-1/$src_file
+EXPECT "0" file_is_linkto $src_hash_brick/test-1/$dstfilename
+
+
+################### Test 2 ####################################
+
+# src-hashed = src-cached != dst-hashed
+# dst-cached = null
+
+echo " **** Test 2 **** "
+
+src_file="src-1"
+
+TEST mkdir $M0/test-2
+TEST touch $M0/test-2/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-2
+src_hashed=$hash_subvol
+#echo "Hashed subvol for $src_file: " $src_hashed
+
+# Find a file name that hashes to a diff hashed subvol than $src_file
+TEST first_filename_with_diff_hashsubvol "$src_hashed" "$M0/test-2" "dst-"
+echo "dst-file name: " $dstfilename
+TEST get_hash_subvol $dstfilename $M0/test-2
+dst_hashed=$hash_subvol
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+dst_hash_brick=$(get_brick_path_for_subvol $dst_hashed)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-2/$src_file $M0/test-2/$dstfilename
+
+
+# Expected:
+# dst file is accessible from the mount point
+# dst data file on src_hashed and dst linkto file on dst_hashed
+# src files do not exist
+
+
+TEST stat $M0/test-2/$dstfilename 2>/dev/null
+TEST file_existence_check test-2/$dstfilename $dst_hashed $src_hashed
+TEST file_does_not_exist test-2/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-2/$dstfilename
+EXPECT "0" file_is_linkto $src_hash_brick/test-2/$dstfilename
+
+################### Test 3 ####################################
+
+# src-hashed = dst-hashed != src-cached
+
+echo " **** Test 3 **** "
+
+src_file0="abc-1"
+
+# 1. Create src file with src_cached != src_hashed
+TEST mkdir $M0/test-3
+TEST touch $M0/test-3/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-3
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_diff_hashsubvol "$src_cached" "$M0/test-3" "src-"
+echo "dst-file name: " $dstfilename
+src_file=$dstfilename
+
+TEST mv $M0/test-3/$src_file0 $M0/test-3/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-3
+src_hashed=$hash_subvol
+
+
+# 2. Rename src to dst
+TEST first_filename_with_same_hashsubvol "$src_hashed" "$M0/test-3" "dst-"
+#echo "dst-file name: " $dstfilename
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-3/$src_file $M0/test-3/$dstfilename
+
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-3/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-3/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+TEST file_existence_check test-3/$dstfilename $src_hashed $src_cached
+
+EXPECT "1" file_is_linkto $src_hash_brick/test-3/$dstfilename
+EXPECT "0" file_is_linkto $src_cached_brick/test-3/$dstfilename
+
+
+
+################### Test 4 ####################################
+
+# src-cached = dst-hashed != src-hashed
+
+echo " **** Test 4 **** "
+
+src_file0="abc-1"
+
+# 1. Create src file with src_cached != src_hashed
+TEST mkdir $M0/test-4
+TEST touch $M0/test-4/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-4
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_diff_hashsubvol "$src_cached" "$M0/test-4" "src-"
+src_file=$dstfilename
+
+TEST mv $M0/test-4/$src_file0 $M0/test-4/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-4
+src_hashed=$hash_subvol
+
+
+# 2. Rename src to dst
+TEST first_filename_with_same_hashsubvol "$src_cached" "$M0/test-4" "dst-"
+#echo "dst-file name: " $dstfilename
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-4/$src_file $M0/test-4/$dstfilename
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-4/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-4/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+TEST file_existence_check test-4/$dstfilename $src_cached
+
+EXPECT "0" file_is_linkto $src_cached_brick/test-4/$dstfilename
+
+
+################### Test 5 ####################################
+
+# src-cached != src-hashed
+# src-hashed != dst-hashed
+# src-cached != dst-hashed
+
+
+echo " **** Test 5 **** "
+
+# 1. Create src and dst files
+
+TEST mkdir $M0/test-5
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-5" "abc-"
+src_file0=$dstfilename
+
+TEST touch $M0/test-5/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-5
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-5" "src-"
+src_file=$dstfilename
+
+TEST mv $M0/test-5/$src_file0 $M0/test-5/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-5
+src_hashed=$hash_subvol
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-5" "dst-"
+#echo "dst-file name: " $dstfilename
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+
+# 2. Rename src to dst
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-5/$src_file $M0/test-5/$dstfilename
+
+
+# 3. Validate
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-5/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-5/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+
+EXPECT "0" file_is_linkto $src_cached_brick/test-5/$dstfilename
+EXPECT "1" file_is_linkto $dst_hash_brick/test-5/$dstfilename
+
+
+########################################################################
+#
+# The Dst file exists
+#
+########################################################################
+
+################### Test 6 ####################################
+
+# src_hash = src_cached
+# dst_hash = dst_cached
+# dst_hash = src_hash
+
+
+TEST mkdir $M0/test-6
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-6" "src-"
+src_file=$dstfilename
+
+TEST touch $M0/test-6/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-6" "dst-"
+dst_file=$dstfilename
+
+TEST touch $M0/test-6/$dst_file
+
+
+# 2. Rename src to dst
+
+TEST mv $M0/test-6/$src_file $M0/test-6/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-6/$dst_file 2>/dev/null
+TEST file_existence_check test-6/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-6/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-6/$dst_file
+
+
+################### Test 7 ####################################
+
+# src_hash = src_cached
+# dst_hash = dst_cached
+# dst_hash != src_hash
+
+
+echo " **** Test 7 **** "
+
+TEST mkdir $M0/test-7
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-7" "src-"
+src_file=$dstfilename
+
+TEST touch $M0/test-7/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-7" "dst-"
+dst_file=$dstfilename
+
+TEST touch $M0/test-7/$dst_file
+
+
+# 2. Rename src to dst
+
+TEST mv $M0/test-7/$src_file $M0/test-7/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+src_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-7/$dst_file 2>/dev/null
+TEST file_existence_check test-7/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-7/$src_file
+
+EXPECT "0" file_is_linkto $src_hash_brick/test-7/$dst_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-7/$dst_file
+
+
+################### Test 8 ####################################
+
+# src_hash = src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash
+# dst_cached != src_hash
+
+echo " **** Test 8 **** "
+
+TEST mkdir $M0/test-8
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-8" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-8/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-8" "dst0-"
+dst_file0=$dstfilename
+TEST touch $M0/test-8/$dst_file0
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-8" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-8/$dst_file0 $M0/test-8/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-8/$src_file $M0/test-8/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+src_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-8/$dst_file 2>/dev/null
+TEST file_existence_check test-8/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-8/$src_file
+
+EXPECT "0" file_is_linkto $src_hash_brick/test-8/$dst_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-8/$dst_file
+
+################### Test 9 ####################################
+
+# src_hash = src_cached = dst_hash
+# dst_hash != dst_cached
+
+echo " **** Test 9 **** "
+
+TEST mkdir $M0/test-9
+
+
+# 1. Create src and dst files
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-9" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-9/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-9" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-9/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-9" "dst-"
+dst_file=$dstfilename
+
+TEST mv $M0/test-9/$dst0_file $M0/test-9/$dst_file
+
+# 2. Rename the file
+
+mv $M0/test-9/$src_file $M0/test-9/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-9/$dst_file 2>/dev/null
+TEST file_existence_check test-9/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-9/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-9/$dst_file
+
+
+################### Test 10 ####################################
+
+# src_hash = src_cached = dst_cached
+# dst_hash != dst_cached
+
+echo " **** Test 10 **** "
+
+TEST mkdir $M0/test-10
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-10" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-10/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-10" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-10/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-10" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-10/$dst0_file $M0/test-10/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-10/$src_file $M0/test-10/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-10/$dst_file 2>/dev/null
+TEST file_existence_check test-10/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-10/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-10/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-10/$dst_file
+
+
+################### Test 11 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached = src_cached
+
+echo " **** Test 11 **** "
+
+TEST mkdir $M0/test-11
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-11" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-11/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-11" "src-"
+src_file=$dstfilename
+
+mv $M0/test-11/$src0_file $M0/test-11/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-11" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-11/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-11/$src_file $M0/test-11/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-11/$dst_file 2>/dev/null
+TEST file_existence_check test-11/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-11/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-11/$dst_file
+
+
+################### Test 12 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached = src_hash
+
+echo " **** Test 12 **** "
+
+TEST mkdir $M0/test-12
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-12" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-12/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-12" "src-"
+src_file=$dstfilename
+
+mv $M0/test-12/$src0_file $M0/test-12/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-12" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-12/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-12/$src_file $M0/test-12/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-12/$dst_file 2>/dev/null
+TEST file_existence_check test-12/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-12/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-12/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-12/$dst_file
+
+################### Test 13 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached
+# dst_hash != src_cached
+# dst_hash != src_hash
+
+echo " **** Test 13 **** "
+
+TEST mkdir $M0/test-13
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-13" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-13/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-13" "src-"
+src_file=$dstfilename
+
+mv $M0/test-13/$src0_file $M0/test-13/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-13" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-13/$dst_file
+
+# 2. Rename the file
+
+mv $M0/test-13/$src_file $M0/test-13/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-13/$dst_file 2>/dev/null
+TEST file_existence_check test-13/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-13/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-13/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-13/$dst_file
+
+
+################### Test 14 ####################################
+
+# src_hash != src_cached
+# dst_hash = src_hash
+# dst_cached = src_cached
+
+echo " **** Test 14 **** "
+
+TEST mkdir $M0/test-14
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-14" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-14/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-14" "src-"
+src_file=$dstfilename
+
+mv $M0/test-14/$src0_file $M0/test-14/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-14" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-14/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-14" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-14/$dst0_file $M0/test-14/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-14/$src_file $M0/test-14/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-14/$dst_file 2>/dev/null
+TEST file_existence_check test-14/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-14/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-14/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-14/$dst_file
+
+################### Test 15 ####################################
+
+# src_hash != src_cached
+# dst_hash != src_hash
+# dst_hash != src_cached
+# dst_cached = src_cached
+
+echo " **** Test 15 **** "
+
+TEST mkdir $M0/test-15
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-15" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-15/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-15" "src-"
+src_file=$dstfilename
+
+mv $M0/test-15/$src0_file $M0/test-15/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-15" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-15/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-15" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-15/$dst0_file $M0/test-15/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-15/$src_file $M0/test-15/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-15/$dst_file 2>/dev/null
+TEST file_existence_check test-15/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-15/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-15/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-15/$dst_file
+
+
+
+################### Test 16 ####################################
+
+# src_hash != src_cached
+# dst_hash = src_cached
+# dst_cached = src_hash
+
+echo " **** Test 16 **** "
+
+TEST mkdir $M0/test-16
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-16" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-16/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-16" "src-"
+src_file=$dstfilename
+
+mv $M0/test-16/$src0_file $M0/test-16/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-16" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-16/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-16" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-16/$dst0_file $M0/test-16/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-16/$src_file $M0/test-16/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-16/$dst_file 2>/dev/null
+TEST file_existence_check test-16/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-16/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-16/$dst_file
+
+
+################### Test 17 ####################################
+
+# src_hash != src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash != src_cached
+# dst_cached = src_hash
+
+
+echo " **** Test 17 **** "
+
+TEST mkdir $M0/test-17
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-17" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-17/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-17" "src-"
+src_file=$dstfilename
+
+mv $M0/test-17/$src0_file $M0/test-17/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-17" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-17/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-17" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-17/$dst0_file $M0/test-17/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-17/$src_file $M0/test-17/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-17/$dst_file 2>/dev/null
+TEST file_existence_check test-17/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-17/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-17/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-17/$dst_file
+
+
+################### Test 18 ####################################
+
+# src_hash != src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash != src_cached != dst_cached
+
+
+echo " **** Test 18 **** "
+
+TEST mkdir $M0/test-18
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-18" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-18/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-18" "src-"
+src_file=$dstfilename
+
+mv $M0/test-18/$src0_file $M0/test-18/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-18" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-18/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-3" "$M0/test-18" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-18/$dst0_file $M0/test-18/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-18/$src_file $M0/test-18/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-3")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-18/$dst_file 2>/dev/null
+TEST file_existence_check test-18/$dst_file "$V0-client-3" "$V0-client-0"
+TEST file_does_not_exist test-18/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-18/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-18/$dst_file
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/spare_file_rebalance.t b/tests/basic/distribute/spare_file_rebalance.t
new file mode 100644
index 00000000000..061c02f7392
--- /dev/null
+++ b/tests/basic/distribute/spare_file_rebalance.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#------------------------------------------------------------
+
+# Test case - Create sparse files on MP and verify
+# file info after rebalance
+#------------------------------------------------------------
+
+# Create some sparse files and get their size
+TEST cd $M0;
+dd if=/dev/urandom of=sparse_file bs=10k count=1 seek=2M
+cp --sparse=always sparse_file sparse_file_3;
+
+# Add a 3rd brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3;
+
+# Trigger rebalance
+TEST $CLI volume rebalance $V0 start force;
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed;
+
+# Compare original and rebalanced files
+TEST cd $B0/${V0}2
+TEST cmp sparse_file $B0/${V0}3/sparse_file_3
+EXPECT_WITHIN 30 "";
+
+cleanup;
diff --git a/tests/basic/ec/ec-1468261.t b/tests/basic/ec/ec-1468261.t
index 902fbb7d2f1..77d704cf880 100644
--- a/tests/basic/ec/ec-1468261.t
+++ b/tests/basic/ec/ec-1468261.t
@@ -34,11 +34,12 @@ EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/te
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
-#Touch a file and kill two bricks
-TEST touch $M0/test_dir/new_file
+#Kill two bricks and touch a file
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST kill_brick $V0 $H0 $B0/${V0}1
EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+TEST touch $M0/test_dir/new_file
+sleep 2
#Dirty should be set on up bricks
EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
@@ -58,15 +59,13 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST glusterfs -s $H0 --volfile-id $V0 $M0;
#Create a tar file
-TEST mkdir $M0/test_dir
-for i in {1..3000};do
-dd if=/dev/urandom of=$M0/test_dir/file-$i bs=1k count=10;
-done
-tar -cf $M0/test_dir.tar $M0/test_dir/ 2>/dev/null
-rm -rf $M0/test_dir/
+TEST mkdir /tmp/test_dir
+seq 1 3000 | xargs -n 1 -P 20 -I {} dd if=/dev/urandom of=/tmp/test_dir/file-{} bs=10K count=1
+tar -cf /tmp/test_dir.tar /tmp/test_dir/ 2>/dev/null
+rm -rf /tmp/test_dir/
#Untar the tar file
-tar -C $M0 -xf $M0/test_dir.tar 2>/dev/null&
+tar -C $M0 -xf /tmp/test_dir.tar 2>/dev/null&
#Kill 1st and 2nd brick
TEST kill_brick $V0 $H0 $B0/${V0}0
@@ -75,6 +74,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
#Stop untaring
TEST kill %1
+rm -f /tmp/test_dir.tar
#Bring up the down bricks
TEST $CLI volume start $V0 force
diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c
new file mode 100644
index 00000000000..8be23c10eaf
--- /dev/null
+++ b/tests/basic/ec/ec-badfd.c
@@ -0,0 +1,124 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = malloc(count + 1);
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+int
+write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ fprintf(stderr, "failed to create iov");
+ goto out;
+ }
+
+ ret = glfs_pwritev(glfd, &iov, 1, 0, flags);
+out:
+ if (ret < 0) {
+ fprintf(stderr, "glfs_pwritev failed, %d", errno);
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+ char volume_cmd[4096] = {0};
+
+ if (argc != 4) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = write_sync(fs, fd, 16);
+ if (ret < 0) {
+ fprintf(stderr, "write_sync failed\n");
+ }
+
+ snprintf(volume_cmd, sizeof(volume_cmd),
+ "gluster --mode=script volume stop %s", argv[2]);
+ /*Stop the volume so that update-size-version fails*/
+ system(volume_cmd);
+ sleep(8); /* 3 seconds more than eager-lock-timeout*/
+ snprintf(volume_cmd, sizeof(volume_cmd),
+ "gluster --mode=script volume start %s", argv[2]);
+ system(volume_cmd);
+ sleep(8); /*wait for bricks to come up*/
+ ret = glfs_fsync(fd, NULL, NULL);
+ if (ret == 0) {
+ fprintf(stderr, "fsync succeeded on a BADFD\n");
+ exit(1);
+ }
+
+ ret = glfs_close(fd);
+ if (ret == 0) {
+ fprintf(stderr, "flush succeeded on a BADFD\n");
+ exit(1);
+ }
+ ret = 0;
+
+out:
+ unlink("/tmp/ec-badfd.log");
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t
new file mode 100755
index 00000000000..56feb47f115
--- /dev/null
+++ b/tests/basic/ec/ec-badfd.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST touch $M0/file
+
+TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2
+TEST $(dirname $0)/ec-badfd $H0 $V0 /file
+cleanup_tester $(dirname ${0})/ec-badfd
+
+cleanup;
diff --git a/tests/basic/ec/ec-cpu-extensions.t b/tests/basic/ec/ec-cpu-extensions.t
index a599a316925..c9af27ea234 100644
--- a/tests/basic/ec/ec-cpu-extensions.t
+++ b/tests/basic/ec/ec-cpu-extensions.t
@@ -1,6 +1,6 @@
#!/bin/bash
-DISPERSE=6
+DISPERSE=18
REDUNDANCY=2
. $(dirname $0)/../../include.rc
@@ -39,6 +39,7 @@ TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 redundancy $REDUNDANCY $H0:$B0/${V0}{1..$DISPERSE}
TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume set $V0 disperse.read-policy round-robin
EXPECT 'Created' volinfo_field $V0 'Status'
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
diff --git a/tests/basic/ec/ec-dirty-flags.t b/tests/basic/ec/ec-dirty-flags.t
new file mode 100644
index 00000000000..68e66103f08
--- /dev/null
+++ b/tests/basic/ec/ec-dirty-flags.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This checks if the fop keeps the dirty flags settings correctly after
+# finishing the fop.
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+cd $M0
+for i in {1..1000}; do dd if=/dev/zero of=file-${i} bs=512k count=2; done
+cd -
+EXPECT "^0$" get_pending_heal_count $V0
+
+cleanup
diff --git a/tests/basic/ec/ec-fix-openfd.t b/tests/basic/ec/ec-fix-openfd.t
index c32f9332137..04fdd802c62 100644
--- a/tests/basic/ec/ec-fix-openfd.t
+++ b/tests/basic/ec/ec-fix-openfd.t
@@ -37,6 +37,8 @@ TEST fd_open $fd 'rw' "$M0/test_file"
TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+sleep 1
+
#Test the fd count
EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 test_file
EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}1 test_file
diff --git a/tests/basic/ec/ec-quorum-count.t b/tests/basic/ec/ec-quorum-count.t
new file mode 100644
index 00000000000..9310ebbb8f2
--- /dev/null
+++ b/tests/basic/ec/ec-quorum-count.t
@@ -0,0 +1,167 @@
+ #!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../ec.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 0
+
+#Should fail on non-disperse volume
+TEST ! $CLI volume set $V1 disperse.quorum-count 5
+
+#Should succeed on a valid range
+TEST ! $CLI volume set $V0 disperse.quorum-count 0
+TEST ! $CLI volume set $V0 disperse.quorum-count -0
+TEST ! $CLI volume set $V0 disperse.quorum-count abc
+TEST ! $CLI volume set $V0 disperse.quorum-count 10abc
+TEST ! $CLI volume set $V0 disperse.quorum-count 1
+TEST ! $CLI volume set $V0 disperse.quorum-count 2
+TEST ! $CLI volume set $V0 disperse.quorum-count 3
+TEST $CLI volume set $V0 disperse.quorum-count 4
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Test that the option is reflected in the mount
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^4$" ec_option_value $V0 $M0 0 quorum-count
+TEST $CLI volume reset $V0 disperse.quorum-count
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
+TEST $CLI volume set $V0 disperse.quorum-count 6
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^6$" ec_option_value $V0 $M0 0 quorum-count
+
+TEST touch $M0/a
+TEST touch $M0/data
+TEST setfattr -n trusted.def -v def $M0/a
+TEST touch $M0/src
+TEST touch $M0/del-me
+TEST mkdir $M0/dir1
+TEST dd if=/dev/zero of=$M0/read-file bs=1M count=1 oflag=direct
+TEST dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
+TEST gf_rm_file_and_gfid_link $B0/${V0}0 del-file
+#modify operations should fail as the file is not in quorum
+TEST ! dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
+TEST kill_brick $V0 $H0 $B0/${V0}0
+#Read should succeed even when quorum-count is not met
+TEST dd if=$M0/read-file of=/dev/null iflag=direct
+TEST ! touch $M0/a2
+TEST ! mkdir $M0/dir2
+TEST ! mknod $M0/b2 b 4 5
+TEST ! ln -s $M0/a $M0/symlink
+TEST ! ln $M0/a $M0/link
+TEST ! mv $M0/src $M0/dst
+TEST ! rm -f $M0/del-me
+TEST ! rmdir $M0/dir1
+TEST ! dd if=/dev/zero of=$M0/a bs=1M count=1 conv=notrunc
+TEST ! dd if=/dev/zero of=$M0/data bs=1M count=1 conv=notrunc
+TEST ! truncate -s 0 $M0/a
+TEST ! setfattr -n trusted.abc -v abc $M0/a
+TEST ! setfattr -x trusted.def $M0/a
+TEST ! chmod +x $M0/a
+TEST ! fallocate -l 2m -n $M0/a
+TEST ! fallocate -p -l 512k $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+# reset the option and check whether the default redundancy count is
+# accepted or not.
+TEST $CLI volume reset $V0 disperse.quorum-count
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
+TEST touch $M0/a1
+TEST touch $M0/data1
+TEST setfattr -n trusted.def -v def $M0/a1
+TEST touch $M0/src1
+TEST touch $M0/del-me1
+TEST mkdir $M0/dir11
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/a21
+TEST mkdir $M0/dir21
+TEST mknod $M0/b21 b 4 5
+TEST ln -s $M0/a1 $M0/symlink1
+TEST ln $M0/a1 $M0/link1
+TEST mv $M0/src1 $M0/dst1
+TEST rm -f $M0/del-me1
+TEST rmdir $M0/dir11
+TEST dd if=/dev/zero of=$M0/a1 bs=1M count=1 conv=notrunc
+TEST dd if=/dev/zero of=$M0/data1 bs=1M count=1 conv=notrunc
+TEST truncate -s 0 $M0/a1
+TEST setfattr -n trusted.abc -v abc $M0/a1
+TEST setfattr -x trusted.def $M0/a1
+TEST chmod +x $M0/a1
+TEST fallocate -l 2m -n $M0/a1
+TEST fallocate -p -l 512k $M0/a1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST touch $M0/a2
+TEST touch $M0/data2
+TEST setfattr -n trusted.def -v def $M0/a1
+TEST touch $M0/src2
+TEST touch $M0/del-me2
+TEST mkdir $M0/dir12
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! touch $M0/a22
+TEST ! mkdir $M0/dir22
+TEST ! mknod $M0/b22 b 4 5
+TEST ! ln -s $M0/a2 $M0/symlink2
+TEST ! ln $M0/a2 $M0/link2
+TEST ! mv $M0/src2 $M0/dst2
+TEST ! rm -f $M0/del-me2
+TEST ! rmdir $M0/dir12
+TEST ! dd if=/dev/zero of=$M0/a2 bs=1M count=1 conv=notrunc
+TEST ! dd if=/dev/zero of=$M0/data2 bs=1M count=1 conv=notrunc
+TEST ! truncate -s 0 $M0/a2
+TEST ! setfattr -n trusted.abc -v abc $M0/a2
+TEST ! setfattr -x trusted.def $M0/a2
+TEST ! chmod +x $M0/a2
+TEST ! fallocate -l 2m -n $M0/a2
+TEST ! fallocate -p -l 512k $M0/a2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+# Set quorum-count to 5 and kill 1 brick and the fops should pass
+TEST $CLI volume set $V0 disperse.quorum-count 5
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^5$" ec_option_value $V0 $M0 0 quorum-count
+TEST touch $M0/a3
+TEST touch $M0/data3
+TEST setfattr -n trusted.def -v def $M0/a3
+TEST touch $M0/src3
+TEST touch $M0/del-me3
+TEST mkdir $M0/dir13
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/a31
+TEST mkdir $M0/dir31
+TEST mknod $M0/b31 b 4 5
+TEST ln -s $M0/a3 $M0/symlink3
+TEST ln $M0/a3 $M0/link3
+TEST mv $M0/src3 $M0/dst3
+TEST rm -f $M0/del-me3
+TEST rmdir $M0/dir13
+TEST dd if=/dev/zero of=$M0/a3 bs=1M count=1 conv=notrunc
+TEST dd if=/dev/zero of=$M0/data3 bs=1M count=1 conv=notrunc
+TEST truncate -s 0 $M0/a3
+TEST setfattr -n trusted.abc -v abc $M0/a3
+TEST setfattr -x trusted.def $M0/a3
+TEST chmod +x $M0/a3
+TEST fallocate -l 2m -n $M0/a3
+TEST fallocate -p -l 512k $M0/a3
+TEST dd if=/dev/urandom of=$M0/heal-file bs=1M count=1 oflag=direct
+cksum_before_heal="$(md5sum $M0/heal-file | awk '{print $1}')"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+cksum_after_heal=$(dd if=$M0/heal-file iflag=direct | md5sum | awk '{print $1}')
+TEST [[ $cksum_before_heal == $cksum_after_heal ]]
+cleanup;
diff --git a/tests/basic/ec/ec-read-mask.t b/tests/basic/ec/ec-read-mask.t
new file mode 100644
index 00000000000..ddb556f2973
--- /dev/null
+++ b/tests/basic/ec/ec-read-mask.t
@@ -0,0 +1,114 @@
+ #!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../ec.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Empty read-mask should fail
+TEST ! $GFS --xlator-option=*.ec-read-mask="" -s $H0 --volfile-id $V0 $M0
+
+#Less than 4 number of bricks should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option=*.ec-read-mask="0:1:2" -s $H0 --volfile-id $V0 $M0
+
+#ids greater than 5 should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:6" -s $H0 --volfile-id $V0 $M0
+
+#ids less than 0 should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:-1:2:5" -s $H0 --volfile-id $V0 $M0
+
+#read-mask with non-alphabet or comma should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5:abc" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5a" -s $H0 --volfile-id $V0 $M0
+
+#mount with at least 4 read-mask-ids and all of them valid should pass
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5:4:3" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^111111$" ec_option_value $V0 $M0 0 read-mask
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+
+TEST dd if=/dev/urandom of=$M0/a bs=1M count=1
+md5=$(md5sum $M0/a | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Read on the file should fail if any of the read-mask is down when number of
+#ids is data-count
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}5
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+#Read on file should succeed when non-read-mask bricks are down
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+#Deliberately corrupt chunks 3: 4 and check that reads still give correct data
+TEST dd if=/dev/zero of=$B0/${V0}3/a bs=256k count=1
+TEST dd if=/dev/zero of=$B0/${V0}4/a bs=256k count=1
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/basic/ec/ec-reset-brick.t b/tests/basic/ec/ec-reset-brick.t
new file mode 100644
index 00000000000..f1a625df4ff
--- /dev/null
+++ b/tests/basic/ec/ec-reset-brick.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+mkdir $M0/dir
+touch $M0/dir/{1..10}
+
+mkdir $M0/dir/dir1
+touch $M0/dir/dir1/{1..10}
+
+#kill brick process
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}5 start
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+#reset-brick by removing all the data and create dir again
+rm -rf $B0/${V0}5
+mkdir $B0/${V0}5
+
+#start brick process and heal by commiting reset-brick
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}5 commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+EXPECT "^12$" num_entries $B0/${V0}5/dir
+EXPECT "^11$" num_entries $B0/${V0}5/dir/dir1
+
+ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0)
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}3
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}4
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}5
+
+cleanup;
diff --git a/tests/basic/ec/ec-root-heal.t b/tests/basic/ec/ec-root-heal.t
index a133885ef1d..11ea7cdf9d4 100644
--- a/tests/basic/ec/ec-root-heal.t
+++ b/tests/basic/ec/ec-root-heal.t
@@ -22,7 +22,8 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
# active heal
TEST $CLI volume heal $V0 full
#ls -l gives "Total" line so number of lines will be 1 more
-EXPECT_WITHIN $HEAL_TIMEOUT "^11$" num_entries $B0/${V0}6
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+EXPECT "^11$" num_entries $B0/${V0}6
ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0)
EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1
EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2
diff --git a/tests/basic/ec/ec-seek.t b/tests/basic/ec/ec-seek.t
index 6a0060870c8..5a7d31b9f8f 100644
--- a/tests/basic/ec/ec-seek.t
+++ b/tests/basic/ec/ec-seek.t
@@ -6,7 +6,7 @@
cleanup
SEEK=$(dirname $0)/seek
-build_tester $(dirname $0)/seek.c -o ${SEEK}
+build_tester $(dirname $0)/../seek.c -o ${SEEK}
TEST glusterd
TEST pidof glusterd
@@ -51,6 +51,7 @@ EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5
EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 512))
EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 6))
+rm -f ${SEEK}
cleanup
# Centos6 regression slaves seem to not support SEEK_DATA/SEEK_HOLE
diff --git a/tests/basic/ec/ec-stripe.t b/tests/basic/ec/ec-stripe.t
index 1e940eba81b..98b92294feb 100644
--- a/tests/basic/ec/ec-stripe.t
+++ b/tests/basic/ec/ec-stripe.t
@@ -202,7 +202,7 @@ TEST truncate -s 0 $B0/test_file
TEST truncate -s 0 $M0/test_file
TEST dd if=$B0/misc_file of=$B0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc
TEST dd if=$B0/misc_file of=$M0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc
-check_statedump_md5sum 4 5
+check_statedump_md5sum 4 4
clean_file_unmount
### 14 - Truncate to invalidate all but one the stripe in cache ####
diff --git a/tests/basic/ec/gfapi-ec-open-truncate.c b/tests/basic/ec/gfapi-ec-open-truncate.c
new file mode 100644
index 00000000000..fb16807003a
--- /dev/null
+++ b/tests/basic/ec/gfapi-ec-open-truncate.c
@@ -0,0 +1,171 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = calloc(count + 1, sizeof(fillchar));
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ int ret = 0;
+ int i = 0;
+ int count = 200;
+ struct iovec iov = {0};
+ int flags = O_RDWR;
+ int bricksup = 0;
+ int fdopen = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ /* Brick is down and we are opening a file to trigger fd heal. */
+ /* Bypass Write-behind */
+ glfd = glfs_open(fs, "a", O_WRONLY | O_TRUNC | O_SYNC);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_open_truncate failed");
+ exit(1);
+ }
+ system("gluster --mode=script volume start patchy force");
+ /*CHILD_UP_TIMEOUT is 20 seconds*/
+ for (i = 0; i < 20; i++) {
+ ret = system(
+ "[ $(gluster --mode=script volume status patchy | "
+ "grep \" Y \" | awk '{print $(NF-1)}' | wc -l) == 3 ]");
+ if (WIFEXITED(ret) && WEXITSTATUS(ret)) {
+ printf("Ret value of system: %d\n, ifexited: %d, exitstatus: %d",
+ ret, WIFEXITED(ret), WEXITSTATUS(ret));
+ sleep(1);
+ continue;
+ }
+ printf("Number of loops: %d\n", i);
+ bricksup = 1;
+ break;
+ }
+ if (!bricksup) {
+ system("gluster --mode=script volume status patchy");
+ LOG_ERR("Bricks didn't come up\n");
+ exit(1);
+ }
+
+ /*Not sure how to check that the child-up reached EC, so sleep 3 for now*/
+ sleep(3);
+ ret = fill_iov(&iov, 'a', 200);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ exit(1);
+ }
+
+ /*write will trigger re-open*/
+ ret = glfs_pwritev(glfd, &iov, 1, 0, flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_test_function failed");
+ exit(1);
+ }
+ /*Check reopen happened by checking for open-fds on the brick*/
+ for (i = 0; i < 20; i++) {
+ ret = system(
+ "[ $(for i in $(pgrep glusterfsd); do ls -l /proc/$i/fd | grep "
+ "\"[.]glusterfs\" | grep -v \".glusterfs/[0-9a-f][0-9a-f]\" | grep "
+ "-v health_check; done | wc -l) == 3 ]");
+ if (WIFEXITED(ret) && WEXITSTATUS(ret)) {
+ printf("Ret value of system: %d\n, ifexited: %d, exitstatus: %d",
+ ret, WIFEXITED(ret), WEXITSTATUS(ret));
+ sleep(1);
+ continue;
+ }
+ fdopen = 1;
+ break;
+ }
+
+ if (!fdopen) {
+ LOG_ERR("fd reopen didn't succeed");
+ exit(1);
+ }
+
+ return 0;
+}
diff --git a/tests/basic/ec/gfapi-ec-open-truncate.t b/tests/basic/ec/gfapi-ec-open-truncate.t
new file mode 100644
index 00000000000..e22562c6ea3
--- /dev/null
+++ b/tests/basic/ec/gfapi-ec-open-truncate.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This .t tests the functionality of open-fd-heal when opened with O_TRUNC.
+#If re-open is not done with O_TRUNC then the test will pass.
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 disperse 3 ${H0}:$B0/brick{1,2,3}
+EXPECT 'Created' volinfo_field $V0 'Status'
+#Disable heals to prevent any chance of heals masking the problem
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 0
+TEST $CLI volume set $V0 performance.write-behind off
+
+#We need truncate fop to go through before pre-op completes for the write-fop
+#which triggers open-fd heal. Otherwise truncate won't be allowed on 'bad' brick
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.enable fxattrop
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.delay-duration 1000000
+
+TEST $CLI volume heal $V0 disable
+
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/a
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST kill_brick $V0 $H0 $B0/brick1
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ec-open-truncate.c -lgfapi
+
+TEST $CLI volume profile $V0 info clear
+TEST ./$(dirname $0)/gfapi-ec-open-truncate ${H0} $V0 $logdir/gfapi-ec-open-truncate.log
+
+EXPECT "^2$" echo $($CLI volume profile $V0 info incremental | grep -i truncate | wc -l)
+cleanup_tester $(dirname $0)/gfapi-ec-open-truncate
+
+cleanup
diff --git a/tests/basic/ec/nfs.t b/tests/basic/ec/nfs.t
index f0bdff93d5f..3f51a640ef7 100755
--- a/tests/basic/ec/nfs.t
+++ b/tests/basic/ec/nfs.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup
TEST glusterd
diff --git a/tests/basic/ec/self-heal-read-write-fail.t b/tests/basic/ec/self-heal-read-write-fail.t
new file mode 100644
index 00000000000..0ba591b5bb2
--- /dev/null
+++ b/tests/basic/ec/self-heal-read-write-fail.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This test verifies that self-heal fails when read/write fails as part of heal
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo abc >> $M0/a
+
+# Umount the volume to force all pending writes to reach the bricks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Load error-gen and fail read fop and test that heal fails
+TEST $CLI volume stop $V0 #Stop volume so that error-gen can be loaded
+TEST $CLI volume set $V0 debug.error-gen posix
+TEST $CLI volume set $V0 debug.error-fops read
+TEST $CLI volume set $V0 debug.error-number EBADF
+TEST $CLI volume set $V0 debug.error-failure 100
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST ! getfattr -n trusted.ec.heal $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+#fail write fop and test that heal fails
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 debug.error-fops write
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST ! getfattr -n trusted.ec.heal $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume stop $V0 #Stop volume so that error-gen can be disabled
+TEST $CLI volume reset $V0 debug.error-gen
+TEST $CLI volume reset $V0 debug.error-fops
+TEST $CLI volume reset $V0 debug.error-number
+TEST $CLI volume reset $V0 debug.error-failure
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST getfattr -n trusted.ec.heal $M0/a
+EXPECT "^0$" get_pending_heal_count $V0
+
+#Test that heal worked as expected by forcing read from brick0
+#remount to make sure data is not served from any cache
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT "abc" cat $M0/a
+
+cleanup
diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t
index d217559db1a..6329bb60248 100644
--- a/tests/basic/ec/self-heal.t
+++ b/tests/basic/ec/self-heal.t
@@ -131,6 +131,8 @@ TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
TEST $CLI volume set $V0 client-log-level DEBUG
#Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp'
TEST $CLI volume set $V0 performance.write-behind off
+#md-cache can return stale stat due to default timeout being 1 sec
+TEST $CLI volume set $V0 performance.stat-prefetch off
EXPECT "Created" volinfo_field $V0 'Status'
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
diff --git a/tests/basic/fencing/afr-lock-heal-advanced.c b/tests/basic/fencing/afr-lock-heal-advanced.c
new file mode 100644
index 00000000000..e202ccd5b29
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-advanced.c
@@ -0,0 +1,227 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *logfile_fp;
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!logfile_fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(logfile_fp, "\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(logfile_fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_client(char *hostname, char *volname, char *log_file)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(logfile_fp, "\nglfs_new: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_logging failed with ret: %d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+glfs_fd_t *
+open_file(glfs_t *fs, char *fname)
+{
+ glfs_fd_t *fd = NULL;
+
+ fd = glfs_creat(fs, fname, O_CREAT, 0644);
+ if (!fd) {
+ LOG_ERR("glfs_creat", errno);
+ goto out;
+ }
+out:
+ return fd;
+}
+
+int
+acquire_mandatory_lock(glfs_t *fs, glfs_fd_t *fd)
+{
+ struct flock lock;
+ int ret = 0;
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+perform_test(glfs_t *fs, char *file1, char *file2)
+{
+ int ret = 0;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *buf = "0123456789";
+
+ fd1 = open_file(fs, file1);
+ if (!fd1) {
+ ret = -1;
+ goto out;
+ }
+ fd2 = open_file(fs, file2);
+ if (!fd2) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Kill one brick from the .t.*/
+ pause();
+
+ ret = acquire_mandatory_lock(fs, fd1);
+ if (ret) {
+ goto out;
+ }
+ ret = acquire_mandatory_lock(fs, fd2);
+ if (ret) {
+ goto out;
+ }
+
+ /* Bring the brick up and let the locks heal. */
+ pause();
+ /*At this point, the .t would have killed and brought back 2 bricks, marking
+ * the fd bad.*/
+
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret > 0) {
+ /* Write is supposed to fail with EBADFD*/
+ LOG_ERR("glfs_write", ret);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fd1)
+ glfs_close(fd1);
+ if (fd2)
+ glfs_close(fd2);
+ return ret;
+}
+
+static void
+sigusr1_handler(int signo)
+{
+ /*Signal caught. Just continue with the execution.*/
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname1 = NULL;
+ char *fname2 = NULL;
+
+ if (argc != 7) {
+ fprintf(stderr,
+ "Expect following args %s <host> <volname> <file1> <file2> "
+ "<log file "
+ "location> <log_file_suffix>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ fname1 = argv[3];
+ fname2 = argv[4];
+
+ /*Use SIGUSR1 and pause()as a means of hitting break-points this program
+ *when signalled from the .t test case.*/
+ if (signal(SIGUSR1, sigusr1_handler) == SIG_ERR) {
+ LOG_ERR("SIGUSR1 handler error", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[5], "lock-heal.c", argv[6], "log");
+ logfile_fp = fopen(log_file, "w");
+ if (!logfile_fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[5], "glfs-client", argv[6], "log");
+ fs = setup_client(hostname, volname, log_file);
+ if (!fs) {
+ LOG_ERR("setup_client", errno);
+ return -1;
+ }
+
+ ret = perform_test(fs, fname1, fname2);
+
+error:
+ if (fs) {
+ /*glfs_fini(fs)*/; // glfs fini path is racy and crashes the program
+ }
+
+ fclose(logfile_fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/afr-lock-heal-advanced.t b/tests/basic/fencing/afr-lock-heal-advanced.t
new file mode 100644
index 00000000000..8a5b5989b5e
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-advanced.t
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+PROCESS_UP_TIMEOUT=90
+
+function is_gfapi_program_alive()
+{
+ pid=$1
+ ps -p $pid
+ if [ $? -eq 0 ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function get_active_lock_count {
+ brick=$1
+ i1=$2
+ i2=$3
+ pattern="ACTIVE.*client-${brick: -1}"
+
+ sdump=$(generate_brick_statedump $V0 $H0 $brick)
+ lock_count1="$(egrep "$i1" $sdump -A3| egrep "$pattern"|uniq|wc -l)"
+ lock_count2="$(egrep "$i2" $sdump -A3| egrep "$pattern"|uniq|wc -l)"
+ echo "$((lock_count1+lock_count2))"
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+TEST build_tester $(dirname $0)/afr-lock-heal-advanced.c -lgfapi -ggdb
+
+#------------------------------------------------------------------------------
+# Use more than 1 fd from same client so that list_for_each_* loops are executed more than once.
+$(dirname $0)/afr-lock-heal-advanced $H0 $V0 "/FILE1" "/FILE2" $logdir C1&
+client_pid=$!
+TEST [ $client_pid ]
+
+TEST sleep 5 # By now, the client would have opened an fd on FILE1 and FILE2 and waiting for a SIGUSR1.
+EXPECT "Y" is_gfapi_program_alive $client_pid
+
+gfid_str1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE1))
+inode1="FILE1|gfid:$gfid_str1"
+gfid_str2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE2))
+inode2="FILE2|gfid:$gfid_str2"
+
+# Kill brick-3 and let client-1 take lock on both files.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill -SIGUSR1 $client_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client_pid
+
+# Check lock is present on brick-1 and brick-2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}0 $inode1 $inode2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}1 $inode1 $inode2
+
+# Restart brick-3 and check that the lock has healed on it.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd.
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}2 $inode1 $inode2
+
+#------------------------------------------------------------------------------
+# Kill same brick before heal completes the first time and check it completes the second time.
+TEST $CLI volume set $V0 delay-gen locks
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable finodelk
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume reset $V0 delay-gen
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}0 $inode1 $inode2
+
+#------------------------------------------------------------------------------
+# Kill 2 bricks and bring it back. The fds must be marked bad.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+# TODO: `gluster v statedump $V0 client localhost:$client_pid` is not working,
+# so sleep for 20 seconds for the client to connect to connect to the bricks.
+TEST sleep $CHILD_UP_TIMEOUT
+
+# Try to write to FILE1 from the .c; it must fail.
+TEST kill -SIGUSR1 $client_pid
+wait $client_pid
+ret=$?
+TEST [ $ret == 0 ]
+
+cleanup_tester $(dirname $0)/afr-lock-heal-advanced
+cleanup;
diff --git a/tests/basic/fencing/afr-lock-heal-basic.c b/tests/basic/fencing/afr-lock-heal-basic.c
new file mode 100644
index 00000000000..768c9e57181
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-basic.c
@@ -0,0 +1,182 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *logfile_fp;
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!logfile_fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(logfile_fp, "\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(logfile_fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_client(char *hostname, char *volname, char *log_file)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(logfile_fp, "\nglfs_new: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_logging failed with ret: %d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+int
+acquire_mandatory_lock(glfs_t *fs, char *fname)
+{
+ struct flock lock;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+
+ fd = glfs_creat(fs, fname, O_CREAT, 0644);
+ if (!fd) {
+ if (errno != EEXIST) {
+ LOG_ERR("glfs_creat", errno);
+ ret = -1;
+ goto out;
+ }
+ fd = glfs_open(fs, fname, O_RDWR | O_NONBLOCK);
+ if (!fd) {
+ LOG_ERR("glfs_open", errno);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ pause();
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ pause();
+
+out:
+ if (fd) {
+ glfs_close(fd);
+ }
+
+ return ret;
+}
+
+static void
+sigusr1_handler(int signo)
+{
+ /*Signal caught. Just continue with the execution.*/
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname = NULL;
+
+ if (argc != 6) {
+ fprintf(stderr,
+ "Expect following args %s <host> <volname> <file> <log file "
+ "location> <log_file_suffix>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ fname = argv[3];
+
+ /*Use SIGUSR1 and pause()as a means of hitting break-points this program
+ *when signalled from the .t test case.*/
+ if (signal(SIGUSR1, sigusr1_handler) == SIG_ERR) {
+ LOG_ERR("SIGUSR1 handler error", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[4], "lock-heal-basic.c", argv[5],
+ "log");
+ logfile_fp = fopen(log_file, "w");
+ if (!logfile_fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[4], "glfs-client", argv[5], "log");
+ fs = setup_client(hostname, volname, log_file);
+ if (!fs) {
+ LOG_ERR("setup_client", errno);
+ return -1;
+ }
+
+ ret = acquire_mandatory_lock(fs, fname);
+
+error:
+ if (fs) {
+ /*glfs_fini(fs)*/; // glfs fini path is racy and crashes the program
+ }
+
+ fclose(logfile_fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/afr-lock-heal-basic.t b/tests/basic/fencing/afr-lock-heal-basic.t
new file mode 100644
index 00000000000..69131af085d
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-basic.t
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function is_gfapi_program_alive()
+{
+ pid=$1
+ ps -p $pid
+ if [ $? -eq 0 ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+TEST build_tester $(dirname $0)/afr-lock-heal-basic.c -lgfapi -ggdb
+
+$(dirname $0)/afr-lock-heal-basic $H0 $V0 "/FILE" $logdir C1&
+client1_pid=$!
+TEST [ $client1_pid ]
+
+$(dirname $0)/afr-lock-heal-basic $H0 $V0 "/FILE" $logdir C2&
+client2_pid=$!
+TEST [ $client2_pid ]
+
+TEST sleep 5 # By now, the 2 clients would have opened an fd on FILE and waiting for a SIGUSR1.
+EXPECT "Y" is_gfapi_program_alive $client1_pid
+EXPECT "Y" is_gfapi_program_alive $client2_pid
+
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE))
+inode="FILE|gfid:$gfid_str"
+
+# Kill brick-3 and let client-1 take lock on the file.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill -SIGUSR1 $client1_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client1_pid
+
+# Check lock is present on brick-1 and brick-2
+b1_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}0)
+c1_lock_on_b1="$(egrep "$inode" $b1_sdump -A3| egrep 'ACTIVE.*client-0'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b2_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}1)
+c1_lock_on_b2="$(egrep "$inode" $b2_sdump -A3| egrep 'ACTIVE.*client-1'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c1_lock_on_b1" == "$c1_lock_on_b2" ]
+
+# Restart brick-3 and check that the lock has healed on it.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. Also wait for lock heal.
+
+b3_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}2)
+c1_lock_on_b3="$(egrep "$inode" $b3_sdump -A3| egrep 'ACTIVE.*client-2'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c1_lock_on_b1" == "$c1_lock_on_b3" ]
+
+# Kill brick-1 and let client-2 preempt the lock on bricks 2 and 3.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill -SIGUSR1 $client2_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client2_pid
+
+# Restart brick-1 and let lock healing complete.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. Also wait for lock heal.
+
+# Check that all bricks now have locks from client 2 only.
+b1_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}0)
+c2_lock_on_b1="$(egrep "$inode" $b1_sdump -A3| egrep 'ACTIVE.*client-0'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b2_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}1)
+c2_lock_on_b2="$(egrep "$inode" $b2_sdump -A3| egrep 'ACTIVE.*client-1'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b3_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}2)
+c2_lock_on_b3="$(egrep "$inode" $b3_sdump -A3| egrep 'ACTIVE.*client-2'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c2_lock_on_b1" == "$c2_lock_on_b2" ]
+TEST [ "$c2_lock_on_b1" == "$c2_lock_on_b3" ]
+TEST [ "$c2_lock_on_b1" != "$c1_lock_on_b1" ]
+
+#Let the client programs run and exit.
+TEST kill -SIGUSR1 $client1_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" is_gfapi_program_alive $client1_pid
+TEST kill -SIGUSR1 $client2_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" is_gfapi_program_alive $client2_pid
+
+cleanup_tester $(dirname $0)/afr-lock-heal-basic
+cleanup;
diff --git a/tests/basic/fencing/fence-basic.c b/tests/basic/fencing/fence-basic.c
new file mode 100644
index 00000000000..4aa452e19b0
--- /dev/null
+++ b/tests/basic/fencing/fence-basic.c
@@ -0,0 +1,229 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NO_INIT 1
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *fp;
+char *buf = "0123456789";
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(fp, "\n%d %s : returned error (%s)\n", __LINE__, func, \
+ strerror(err)); \
+ fflush(fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_file, int flag)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(fp, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ if (flag == NO_INIT)
+ goto out;
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+/* test plan
+ *
+ * - take mandatory lock from client 1
+ * - preempt mandatory lock from client 2
+ * - write from client 1 which should fail
+ */
+
+int
+test(glfs_t *fs1, glfs_t *fs2, char *fname)
+{
+ struct flock lock;
+ int ret = 0;
+ glfs_fd_t *fd1, *fd2 = NULL;
+
+ fd1 = glfs_creat(fs1, fname, O_RDWR, 0777);
+ if (ret) {
+ LOG_ERR("glfs_creat", errno);
+ ret = -1;
+ goto out;
+ }
+
+ fd2 = glfs_open(fs2, fname, O_RDWR | O_NONBLOCK);
+ if (ret) {
+ LOG_ERR("glfs_open", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd1, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd1, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret != 10) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* write should fail */
+ ret = glfs_write(fd2, buf, 10, 0);
+ if (ret != -1) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* preempt mandatory lock from client 1*/
+ ret = glfs_file_lock(fd2, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ /* write should succeed from client 2 */
+ ret = glfs_write(fd2, buf, 10, 0);
+ if (ret == -1) {
+ LOG_ERR("glfs_write", errno);
+ goto out;
+ }
+
+ /* write should fail from client 1 */
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret == 10) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd1) {
+ glfs_close(fd1);
+ }
+
+ if (fd2) {
+ glfs_close(fd2);
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs1 = NULL;
+ glfs_t *fs2 = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname = "/file";
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+
+ sprintf(log_file, "%s/%s", argv[3], "fence-basic.log");
+ fp = fopen(log_file, "w");
+ if (!fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s", argv[3], "glfs-client-1.log");
+ fs1 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs1) {
+ LOG_ERR("setup_new_client", errno);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s", argv[3], "glfs-client-2.log");
+ fs2 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs2) {
+ LOG_ERR("setup_new_client", errno);
+ ret = -1;
+ goto error;
+ }
+
+ ret = test(fs1, fs2, fname);
+
+error:
+ if (fs1) {
+ glfs_fini(fs1);
+ }
+
+ if (fs2) {
+ glfs_fini(fs2);
+ }
+
+ fclose(fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/fence-basic.t b/tests/basic/fencing/fence-basic.t
new file mode 100755
index 00000000000..30f379e7b20
--- /dev/null
+++ b/tests/basic/fencing/fence-basic.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 diagnostics.client-log-flush-timeout 30
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/fence-basic.c -lgfapi -ggdb
+TEST $(dirname $0)/fence-basic $H0 $V0 $logdir
+
+cleanup_tester $(dirname $0)/fence-basic
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/fencing/fencing-crash-conistency.t b/tests/basic/fencing/fencing-crash-conistency.t
new file mode 100644
index 00000000000..0c69411e90c
--- /dev/null
+++ b/tests/basic/fencing/fencing-crash-conistency.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# with lock enforcement flag write should fail with out lock
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST touch $M0/file
+
+#write should pass
+TEST "echo "test" > $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enable mandatory locking
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+#write should pass
+TEST "echo "test" >> $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enforce lock on the file
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#write should fail
+TEST ! "echo "test" >> $M0/file"
+TEST ! "truncate -s 0 $M0/file"
+
+#remove lock enforcement flag
+TEST setfattr -x trusted.glusterfs.enforce-mandatory-lock $M0/file
+
+#write should pass
+TEST "echo "test" >> $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enforce lock on the file
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+#kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume start $V0 force
+
+# wait one second for the brick to come online
+sleep 2
+#write should fail (lock xlator gets lock enforcement info from disk)
+TEST ! "echo "test" >> $M0/file"
+TEST ! "truncate -s 0 $M0/file"
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/fencing/test-fence-option.t b/tests/basic/fencing/test-fence-option.t
new file mode 100644
index 00000000000..115cbe7dbdf
--- /dev/null
+++ b/tests/basic/fencing/test-fence-option.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# with lock enforcement flag write should fail with out lock
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST touch $M0/file
+
+#setfattr for mandatory-enforcement will fail
+TEST ! setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#enable mandatory locking
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+
+#setfattr will fail
+TEST ! setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#set lock-enforcement option
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+#setfattr should succeed
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/first-test.t b/tests/basic/first-test.t
deleted file mode 100755
index 535b269e6b3..00000000000
--- a/tests/basic/first-test.t
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-. $(dirname $0)/../include.rc
-
-cat << EOF
-This test should run first for http://review.gluster.org/#/c/13439/ and should
-be removed once that patch has been merged.
-EOF
-
-TEST true
diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c
index aff72d89ca1..ef00aa0f088 100644
--- a/tests/basic/fops-sanity.c
+++ b/tests/basic/fops-sanity.c
@@ -26,6 +26,7 @@
#include <errno.h>
#include <string.h>
#include <dirent.h>
+#include <sys/sysmacros.h>
#ifndef linux
#include <sys/socket.h>
diff --git a/tests/basic/fuse/active-io-graph-switch.t b/tests/basic/fuse/active-io-graph-switch.t
new file mode 100644
index 00000000000..6ec3e1fcbfa
--- /dev/null
+++ b/tests/basic/fuse/active-io-graph-switch.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=12
+
+function perform_io_on_mount {
+ local m="$1"
+ local f="$2"
+ local lockfile="$3"
+ while [ -f "$m/$lockfile" ];
+ do
+ dd if=/dev/zero of=$m/$f bs=1M count=1
+ done
+}
+
+function perform_graph_switch {
+ for i in {1..3}
+ do
+ TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch off
+ sleep 3
+ TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch on
+ sleep 3
+ done
+}
+
+function count_files {
+ ls $M0 | wc -l
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/lock
+for i in {1..100}; do perform_io_on_mount $M0 $i lock & done
+EXPECT_WITHIN 5 "101" count_files
+
+perform_graph_switch
+TEST rm -f $M0/lock
+wait
+EXPECT "100" count_files
+TEST rm -f $M0/{1..100}
+EXPECT "0" count_files
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Repeat the tests with reader-thread-count
+TEST $GFS --reader-thread-count=10 --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/lock
+for i in {1..100}; do perform_io_on_mount $M0 $i lock & done
+EXPECT_WITHIN 5 "101" count_files
+
+perform_graph_switch
+TEST rm -f $M0/lock
+wait
+EXPECT "100" count_files
+TEST rm -f $M0/{1..100}
+EXPECT "0" count_files
+
+cleanup
diff --git a/tests/basic/gfapi/bug-1507896.c b/tests/basic/gfapi/bug-1507896.c
new file mode 100644
index 00000000000..1cc20849c2b
--- /dev/null
+++ b/tests/basic/gfapi/bug-1507896.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new(fs)", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server(fs)", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging(fs)", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init(fs)", ret, out);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ if (ret)
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+ return ret;
+}
diff --git a/tests/basic/gfapi/bug-1507896.t b/tests/basic/gfapi/bug-1507896.t
new file mode 100644
index 00000000000..4764e650232
--- /dev/null
+++ b/tests/basic/gfapi/bug-1507896.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/bug-1507896.c -lgfapi
+
+TEST ./$(dirname $0)/bug-1507896 $H0 $V0 $logdir/bug-1507896.log
+
+#volume name precedding with '/'
+TEST ! ./$(dirname $0)/bug-1507896 $H0 /$V0 $logdir/bug-1507896.log
+
+#volume name passed with any special characters
+TEST ! ./$(dirname $0)/bug-1507896 $H0 test@_$V0 $logdir/bug-1507896.log
+
+cleanup_tester $(dirname $0)/bug-1507896
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-async-calls-test.c b/tests/basic/gfapi/gfapi-async-calls-test.c
index 5a291c3c76b..55835b14709 100644
--- a/tests/basic/gfapi/gfapi-async-calls-test.c
+++ b/tests/basic/gfapi/gfapi-async-calls-test.c
@@ -17,6 +17,17 @@
int cbk_complete = 0;
int cbk_ret_val = -1;
+void
+cbk_check()
+{
+ while (cbk_complete != 1) {
+ sleep(1);
+ }
+ if (cbk_ret_val < 0) {
+ fprintf(stderr, "cbk_ret_val is -ve\n");
+ }
+}
+
int
fill_iov(struct iovec *iov, char fillchar, int count)
{
@@ -76,25 +87,23 @@ out:
}
void
-write_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
- struct stat *poststat, void *cookie)
+pwritev_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
{
if (ret < 0) {
- LOG_ERR("glfs_write failed");
+ LOG_ERR("glfs_pwritev failed");
}
cbk_ret_val = ret;
cbk_complete = 1;
}
int
-write_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+pwritev_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
{
ssize_t ret = -1;
int flags = O_RDWR;
- const char *buff = "This is from my prog\n";
struct iovec iov = {0};
void *write_cookie = NULL;
- void *read_cookie = NULL;
ret = fill_iov(&iov, 'a', char_count);
if (ret) {
@@ -103,7 +112,7 @@ write_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
}
write_cookie = strdup("write_cookie");
- ret = glfs_pwritev_async(glfd, &iov, 1, 0, flags, write_async_cbk,
+ ret = glfs_pwritev_async(glfd, &iov, 1, 0, flags, pwritev_async_cbk,
&write_cookie);
out:
if (ret < 0) {
@@ -112,6 +121,252 @@ out:
return ret;
}
+void
+pwrite_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_pwrite_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+pwrite_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ char *buf2 = "ten bytes!";
+ void *write_cookie = strdup("write_cookie");
+ ret = glfs_pwrite_async(glfd, buf1, 10, 0, flags, pwrite_async_cbk,
+ &write_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_pwrite_async failed");
+ }
+ return ret;
+}
+
+void
+writev_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_writev_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+writev_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *write_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ write_cookie = strdup("write_cookie");
+ ret = glfs_writev_async(glfd, &iov, 1, flags, writev_async_cbk,
+ &write_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_writev_async failed");
+ }
+ return ret;
+}
+
+void
+write_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_write_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+write_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ char *buf2 = "ten bytes!";
+ void *write_cookie = strdup("write_cookie");
+ ret = glfs_write_async(glfd, buf1, 10, flags, write_async_cbk,
+ &write_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_write_async failed");
+ }
+ return ret;
+}
+
+void
+preadv_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_preadv_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+preadv_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *read_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ read_cookie = strdup("preadv_cookie");
+ ret = glfs_preadv_async(glfd, &iov, 1, 0, flags, preadv_async_cbk,
+ &read_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_preadv async failed");
+ }
+ return ret;
+}
+
+void
+pread_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_pread_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+pread_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ void *read_cookie = strdup("read_cookie");
+ ret = glfs_pread_async(glfd, buf1, 10, 0, flags, pread_async_cbk,
+ &read_cookie);
+ if (ret < 0) {
+ LOG_ERR("glfs_pread_async failed");
+ }
+
+ return ret;
+}
+
+void
+readv_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_readv_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+readv_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *read_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ read_cookie = strdup("read_cookie");
+ ret = glfs_readv_async(glfd, &iov, 1, flags, readv_async_cbk, &read_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_readv_async failed");
+ }
+ return ret;
+}
+
+void
+read_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_read_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+read_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ void *read_cookie = strdup("read_cookie");
+ ret = glfs_read_async(glfd, buf1, 10, flags, read_async_cbk, &read_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_read_async failed");
+ }
+ return ret;
+}
+
+void
+fsync_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+void
+fdatasync_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+void
+ftruncate_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_ftruncate_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
int
main(int argc, char *argv[])
{
@@ -124,6 +379,7 @@ main(int argc, char *argv[])
int flags = (O_RDWR | O_CREAT);
glfs_fd_t *glfd = NULL;
int count = 200;
+ void *data = strdup("Sample_text");
if (argc != 4) {
fprintf(stderr, "Invalid argument\n");
@@ -146,14 +402,85 @@ main(int argc, char *argv[])
exit(1);
}
- ret = write_async(fs, glfd, count);
+ ret = pwritev_async(fs, glfd, count);
if (ret) {
- LOG_ERR("glfs_test_function failed");
+ LOG_ERR("glfs_pwritev_async_test failed");
exit(1);
}
+ cbk_check();
- while (cbk_complete != 1) {
- sleep(1);
+ ret = writev_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_writev_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = write_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_write_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = preadv_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_preadv_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = pread_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_pread_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = readv_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_readv_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = read_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_read_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_fsync(glfd, NULL, NULL);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync failed");
+ exit(1);
+ }
+
+ ret = glfs_fdatasync(glfd, NULL, NULL);
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync failed");
+ exit(1);
+ }
+
+ ret = glfs_fsync_async(glfd, fsync_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync_async failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_fdatasync_async(glfd, fdatasync_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync_async failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_ftruncate_async(glfd, 4, ftruncate_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_ftruncate_async failed");
+ exit(1);
}
ret = glfs_close(glfd);
@@ -161,12 +488,7 @@ main(int argc, char *argv[])
LOG_ERR("glfs close failed");
}
- /*
- * skipping fini
- */
+ ret = glfs_fini(fs);
- if (cbk_ret_val == count)
- return 0;
- else
- return -1;
+ return ret;
}
diff --git a/tests/basic/gfapi/gfapi-copy-file-range.t b/tests/basic/gfapi/gfapi-copy-file-range.t
new file mode 100644
index 00000000000..a56d3a58e07
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-copy-file-range.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+mkfs.xfs 2>&1 | grep reflink
+if [ $? -ne 0 ]; then
+ SKIP_TESTS
+ exit
+fi
+
+
+TEST glusterd
+
+TEST truncate -s 2G $B0/xfs_image
+# for now, a xfs filesystem with reflink support is created.
+# In future, better to make changes in MKFS_LOOP so that,
+# once can create a xfs filesystem with reflink enabled in
+# generic and simple way, instead of doing below steps each
+# time.
+TEST mkfs.xfs -f -i size=512 -m reflink=1 $B0/xfs_image;
+
+TEST mkdir $B0/bricks
+TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks
+
+# Just a single brick volume. More test cases need to be
+# added in future for distribute, replicate,
+# distributed replicate and distributed replicated sharded
+# volumes.
+TEST $CLI volume create $V0 $H0:$B0/bricks/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/urandom of=$M0/file bs=1M count=555;
+
+# check for the existence of the created file
+TEST stat $M0/file;
+
+# grab the size of the file
+SRC_SIZE=$(stat -c %s $M0/file);
+
+logdir=`gluster --print-logdir`
+
+# TODO:
+# For now, do not call copy-file-range utility. This is because,
+# the regression machines are centos-7 based which does not have
+# copy_file_range API available. So, instead of this testcase
+# causing regression failures, for now, this is just a dummy test
+# case. Uncomment the below tests (until volume stop) when there
+# is support for copy_file_range in the regression machines.
+#
+
+TEST build_tester $(dirname $0)/glfs-copy-file-range.c -lgfapi
+
+TEST ./$(dirname $0)/glfs-copy-file-range $H0 $V0 $logdir/gfapi-copy-file-range.log /file /new
+
+# check whether the destination file is created or not
+TEST stat $M0/new
+
+# check the size of the destination file
+DST_SIZE=$(stat -c %s $M0/new);
+
+# The sizes of the source and destination should be same.
+# Atleast it ensures that, copy_file_range API is working
+# as expected. Whether the actual cloning happened via reflink
+# or a read/write happened is different matter.
+TEST [ $SRC_SIZE == $DST_SIZE ];
+
+cleanup_tester $(dirname $0)/glfs-copy-file-range
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+UMOUNT_LOOP $B0/bricks;
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
new file mode 100644
index 00000000000..2e666be7ec7
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/sync
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi
+
+
+#Launch a program to keep doing writes on an fd
+./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync &
+p=$!
+sleep 1 #Let some writes go through
+#Check if graph switch will lead to any pending markers for ever
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+
+TEST rm -f $M0/sync #Make sure the glfd is closed
+TEST wait #Wait for background process to die
+#Goal is to check if there is permanent FOOL changelog
+sleep 5
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty
+
+cleanup_tester $(dirname $0)/gfapi-async-calls-test
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c
new file mode 100644
index 00000000000..91b59cea02b
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-keep-writing.c
@@ -0,0 +1,129 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *logfile, const char *syncfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+ struct stat buf = {0};
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ while (glfs_stat(fs, syncfile, &buf) == 0) {
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *syncfile = NULL;
+
+ if (argc != 5) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+ syncfile = argv[4];
+
+ ret = glfs_test_function(hostname, volname, logfile, syncfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c
new file mode 100644
index 00000000000..7beb8dd1fe4
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c
@@ -0,0 +1,127 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *volfile,
+ const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile(fs, volfile);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *volfile, const char *logfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+
+ fs = init_glfs(hostname, volname, volfile, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *volfile = NULL;
+ char *logfile = NULL;
+
+ if (argc != 5) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ volfile = argv[3];
+ logfile = argv[4];
+
+ ret = glfs_test_function(hostname, volname, volfile, logfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t
new file mode 100755
index 00000000000..8e94df9d321
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../ssl.rc
+
+cleanup;
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \
+ -e "s,@@SSL@@,off,g" \
+ $(dirname ${0})/protocol-client-ssl.vol.in \
+ > $(dirname ${0})/protocol-client-ssl.vol
+
+TEST create_self_signed_certs
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ssl-load-volfile-test.c -lgfapi
+
+# Run test without I/O or management encryption
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+# Enable management encryption
+touch $GLUSTERD_WORKDIR/secure-access
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test with management encryption (No I/O encryption)
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+# Enable I/O encryption
+TEST $CLI volume set $V0 server.ssl on
+
+killall_gluster
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \
+ -e "s,@@SSL@@,on,g" \
+ $(dirname ${0})/protocol-client-ssl.vol.in \
+ > $(dirname ${0})/protocol-client-ssl.vol
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test without I/O or management encryption
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+cleanup_tester $(dirname $0)/gfapi-ssl-load-volfile-test
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
+# NetBSD build scripts are not up to date therefore this test
+# is failing in NetBSD. Therefore skipping the test in NetBSD
+# as of now.
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
diff --git a/tests/basic/gfapi/gfapi-statx-basic.c b/tests/basic/gfapi/gfapi-statx-basic.c
new file mode 100644
index 00000000000..a4943fa0fd1
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-statx-basic.c
@@ -0,0 +1,184 @@
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GOTO_LABEL_ON_FALSE(compstr, ret, label) \
+ do { \
+ if (ret == false) { \
+ fprintf(stderr, "%s : comparison failed!\n", compstr); \
+ goto label; \
+ } \
+ } while (0)
+
+#define WRITE_SIZE 513
+#define TRUNC_SIZE 4096
+
+/* Using private function and hence providing a forward declation in sync with
+code in glfs-internal.h */
+int
+glfs_statx(struct glfs *fs, const char *path, unsigned int mask,
+ struct glfs_stat *statxbuf);
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *filename = "file_tmp";
+ const char buff[WRITE_SIZE];
+ struct stat sb;
+ unsigned int mask;
+ struct glfs_stat statx;
+ bool bret;
+
+ if (argc != 3) {
+ fprintf(stderr, "Invalid argument\n");
+ fprintf(stderr, "Usage: %s <volname> <logfile>\n", argv[0]);
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ ret = glfs_truncate(fs, filename, TRUNC_SIZE);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_truncate", ret, out);
+
+ ret = glfs_write(fd1, buff, WRITE_SIZE, flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+
+ ret = glfs_fstat(fd1, &sb);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_fstat", ret, out);
+
+ if (sb.st_size != TRUNC_SIZE) {
+ fprintf(stderr, "wrong size %jd should be %jd\n", (intmax_t)sb.st_size,
+ (intmax_t)2048);
+ ret = -1;
+ goto out;
+ }
+
+ glfs_close(fd1);
+ fd1 = NULL;
+
+ /* TEST 1: Invalid mask to statx */
+ mask = 0xfafadbdb;
+ ret = glfs_statx(fs, filename, mask, NULL);
+ if (ret == 0 || ((ret == -1) && (errno != EINVAL))) {
+ fprintf(stderr,
+ "Invalid args passed, but error returned is"
+ " incorrect (ret - %d, errno - %d)\n",
+ ret, errno);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+ /* TEST 2: Call statx and validate fields against prior fstat data */
+ /* NOTE: This fails, as iatt->ia_flags are not carried through the stack,
+ * for example if mdc_to_iatt is invoked to serve cached stat, we will loose
+ * the flags. */
+ mask = GLFS_STAT_ALL;
+ ret = glfs_statx(fs, filename, mask, &statx);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_statx", ret, out);
+
+ if ((statx.glfs_st_mask & GLFS_STAT_BASIC_STATS) != GLFS_STAT_BASIC_STATS) {
+ fprintf(stderr, "Invalid glfs_st_mask, expecting 0x%x got 0x%x\n",
+ GLFS_STAT_ALL, statx.glfs_st_mask);
+ ret = -1;
+ goto out;
+ }
+
+ bret = (sb.st_ino == statx.glfs_st_ino);
+ GOTO_LABEL_ON_FALSE("(sb.st_ino == statx.glfs_st_ino)", bret, out);
+
+ bret = (sb.st_mode == statx.glfs_st_mode);
+ GOTO_LABEL_ON_FALSE("(sb.st_mode == statx.glfs_st_mode)", bret, out);
+
+ bret = (sb.st_nlink == statx.glfs_st_nlink);
+ GOTO_LABEL_ON_FALSE("(sb.st_nlink == statx.glfs_st_nlink)", bret, out);
+
+ bret = (sb.st_uid == statx.glfs_st_uid);
+ GOTO_LABEL_ON_FALSE("(sb.st_uid == statx.glfs_st_uid)", bret, out);
+
+ bret = (sb.st_gid == statx.glfs_st_gid);
+ GOTO_LABEL_ON_FALSE("(sb.st_gid == statx.glfs_st_gid)", bret, out);
+
+ bret = (sb.st_size == statx.glfs_st_size);
+ GOTO_LABEL_ON_FALSE("(sb.st_size == statx.glfs_st_size)", bret, out);
+
+ bret = (sb.st_blksize == statx.glfs_st_blksize);
+ GOTO_LABEL_ON_FALSE("(sb.st_blksize == statx.glfs_st_blksize)", bret, out);
+
+ bret = (sb.st_blocks == statx.glfs_st_blocks);
+ GOTO_LABEL_ON_FALSE("(sb.st_blocks == statx.glfs_st_blocks)", bret, out);
+
+ bret = (!memcmp(&sb.st_atim, &statx.glfs_st_atime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_atim == statx.glfs_st_atime)", bret, out);
+
+ bret = (!memcmp(&sb.st_mtim, &statx.glfs_st_mtime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_mtim == statx.glfs_st_mtime)", bret, out);
+
+ bret = (!memcmp(&sb.st_ctim, &statx.glfs_st_ctime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_ctim == statx.glfs_st_ctime)", bret, out);
+
+ /* TEST 3: Check if partial masks are accepted */
+ mask = GLFS_STAT_TYPE | GLFS_STAT_UID | GLFS_STAT_GID;
+ ret = glfs_statx(fs, filename, mask, &statx);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_statx", ret, out);
+
+ /* We currently still return all stats, as is acceptable based on the API
+ * definition in the header (and in statx as well) */
+ if ((statx.glfs_st_mask & GLFS_STAT_BASIC_STATS) != GLFS_STAT_BASIC_STATS) {
+ fprintf(stderr, "Invalid glfs_st_mask, expecting 0x%x got 0x%x\n",
+ GLFS_STAT_ALL, statx.glfs_st_mask);
+ ret = -1;
+ goto out;
+ }
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fs) {
+ (void)glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-statx-basic.t b/tests/basic/gfapi/gfapi-statx-basic.t
new file mode 100755
index 00000000000..d9acbce2f99
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-statx-basic.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# NOTE: Test is passing due to very specific volume configuration
+# Disable md-cache, as it does not save and return ia_flags from iatt
+# This is possibly going to be true of other xlators as well (ec/afr), need to
+# ensure these are fixed, or hack statx to return all basic attrs anyway.
+TEST $CLI volume set $V0 performance.md-cache-timeout 0
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/gfapi-statx-basic.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-statx-basic $V0 $logdir/gfapi-statx-basic.log
+
+cleanup_tester $(dirname $0)/gfapi-statx-basic
+
+cleanup;
diff --git a/tests/basic/gfapi/glfs-copy-file-range.c b/tests/basic/gfapi/glfs-copy-file-range.c
new file mode 100644
index 00000000000..1c5fd81fc87
--- /dev/null
+++ b/tests/basic/gfapi/glfs-copy-file-range.c
@@ -0,0 +1,180 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+#include <libgen.h>
+
+static void
+cleanup(glfs_t *fs)
+{
+ if (!fs)
+ return;
+#if 0
+ /* glfs fini path is still racy and crashing the program. Since
+ * this program any way has to die, we are not going to call fini
+ * in the released versions. i.e. final builds. For all
+ * internal testing lets enable this so that glfs_fini code
+ * path becomes stable. */
+ glfs_fini (fs);
+#endif
+}
+
+int
+main(int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ char *logfilepath = NULL;
+ char *path_src = NULL;
+ char *path_dst = NULL;
+ glfs_fd_t *glfd_in = NULL;
+ glfs_fd_t *glfd_out = NULL;
+ char *volfile_server = NULL;
+
+ struct stat stbuf = {
+ 0,
+ };
+ struct glfs_stat stat_src = {
+ 0,
+ };
+ struct glfs_stat prestat_dst = {
+ 0,
+ };
+ struct glfs_stat poststat_dst = {
+ 0,
+ };
+ size_t len;
+
+ if (argc < 6) {
+ printf("%s <volume> <log file path> <source> <destination>", argv[0]);
+ ret = -1;
+ goto out;
+ }
+
+ volfile_server = argv[1];
+ volname = argv[2];
+ logfilepath = argv[3];
+ path_src = argv[4];
+ path_dst = argv[5];
+
+ if (path_src[0] != '/') {
+ fprintf(stderr, "source path %s is not absolute", path_src);
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (path_dst[0] != '/') {
+ fprintf(stderr, "destination path %s is not absolute", path_dst);
+ errno = EINVAL;
+ goto out;
+ }
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ ret = -errno;
+ fprintf(stderr, "Not able to initialize volume '%s'", volname);
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to set the volfile server, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfilepath, 7);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to set the log file path, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ fprintf(stderr, "Volume %s does not exist", volname);
+ } else {
+ fprintf(stderr,
+ "%s: Not able to fetch "
+ "volfile from glusterd",
+ volname);
+ }
+ goto out;
+ }
+
+ glfd_in = glfs_open(fs, path_src, O_RDONLY | O_NONBLOCK);
+ if (!glfd_in) {
+ ret = -errno;
+ goto out;
+ } else {
+ printf("OPEN_SRC: opening %s is success\n", path_src);
+ }
+
+ glfd_out = glfs_creat(fs, path_dst, O_RDWR, 0644);
+ if (!glfd_out) {
+ fprintf(stderr,
+ "FAILED_DST_OPEN: failed to "
+ "open (create) %s (%s)\n",
+ path_dst, strerror(errno));
+ ret = -errno;
+ goto out;
+ } else {
+ printf("OPEN_DST: opening %s is success\n", path_dst);
+ }
+
+ ret = glfs_fstat(glfd_in, &stbuf);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ } else {
+ printf("FSTAT_SRC: fstat on %s is success\n", path_dst);
+ }
+
+ len = stbuf.st_size;
+
+ do {
+ ret = glfs_copy_file_range(glfd_in, NULL, glfd_out, NULL, len, 0,
+ &stat_src, &prestat_dst, &poststat_dst);
+ if (ret == -1) {
+ fprintf(stderr, "copy_file_range failed with %s\n",
+ strerror(errno));
+ ret = -errno;
+ break;
+ } else {
+ printf("copy_file_range successful\n");
+ len -= ret;
+ }
+ } while (len > 0);
+
+out:
+ if (glfd_in)
+ glfs_close(glfd_in);
+ if (glfd_out)
+ glfs_close(glfd_out);
+
+ cleanup(fs);
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/glfs_h_creat_open.c b/tests/basic/gfapi/glfs_h_creat_open.c
new file mode 100644
index 00000000000..7672561e73f
--- /dev/null
+++ b/tests/basic/gfapi/glfs_h_creat_open.c
@@ -0,0 +1,118 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \
+ ret, errno); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+#define LOG_IF_NO_ERR(func, ret) \
+ do { \
+ if (ret == 0) { \
+ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ struct glfs_object *root = NULL, *leaf = NULL;
+ glfs_fd_t *fd = NULL;
+ char *filename = "/ro-file";
+ struct stat sb = {
+ 0,
+ };
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char buf[32] = "abcdefghijklmnopqrstuvwxyz012345";
+
+ fprintf(stderr, "Starting glfs_h_creat_open\n");
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ sleep(2);
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ leaf = glfs_h_creat_open(fs, root, filename, O_RDONLY, 00444, &sb, &fd);
+ if (!leaf || !fd) {
+ ret = -1;
+ LOG_ERR("glfs_h_creat leaf", ret);
+ }
+ fprintf(stderr, "glfs_h_create_open leaf - %p\n", leaf);
+
+ ret = glfs_write(fd, buf, 32, 0);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_write: error writing to file %s, %s\n", filename,
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ LOG_ERR("glfs_h_getattrs", ret);
+
+ if (sb.st_size != 32) {
+ fprintf(stderr, "glfs_write: post size mismatch\n");
+ goto out;
+ }
+
+ fprintf(stderr, "Successfully opened and written to a read-only file \n");
+out:
+ if (fd)
+ glfs_close(fd);
+
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/glfs_h_creat_open.t b/tests/basic/gfapi/glfs_h_creat_open.t
new file mode 100755
index 00000000000..f24ae7395be
--- /dev/null
+++ b/tests/basic/gfapi/glfs_h_creat_open.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfs_h_creat_open.c -lgfapi
+
+TEST ./$(dirname $0)/glfs_h_creat_open $H0 $V0 $logdir/glfs.log
+
+cleanup_tester $(dirname $0)/glfs_h_creat_open
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/glfsxmp-coverage.c b/tests/basic/gfapi/glfsxmp-coverage.c
new file mode 100644
index 00000000000..51650023efd
--- /dev/null
+++ b/tests/basic/gfapi/glfsxmp-coverage.c
@@ -0,0 +1,1900 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+
+#define TEST_STR_LEN 2048
+
+int
+test_dirops(glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char buf[2048];
+ struct dirent *entry = NULL;
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ fprintf(stderr, "Entries:\n");
+ while (glfs_readdir_r(fd, (struct dirent *)buf, &entry), entry) {
+ fprintf(stderr, "%s: %lu\n", entry->d_name, glfs_telldir(fd));
+ }
+
+ /* Should internally call fsyncdir(), hopefully */
+ glfs_fsync(fd, NULL, NULL);
+
+ glfs_closedir(fd);
+ return 0;
+}
+
+int
+test_xattr(glfs_t *fs)
+{
+ char *filename = "/filename2";
+ char *linkfile = "/linkfile";
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ char *ptr;
+ int ret;
+
+ ret = glfs_setxattr(fs, filename, "user.testkey", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_setxattr(fs, filename, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_getxattr(fs, filename, "user.testkey", buf, 512);
+ fprintf(stderr, "getxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_listxattr(fs, filename, buf, 512);
+ fprintf(stderr, "listxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_symlink(fs, "filename", linkfile);
+ fprintf(stderr, "symlink(%s %s): %s\n", filename, linkfile,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_readlink(fs, linkfile, buf, 512);
+ fprintf(stderr, "readlink(%s) : %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lsetxattr(fs, filename, "user.testkey3", "testval", 8, 0);
+ fprintf(stderr, "lsetxattr(%s) : %d (%s)\n", linkfile, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_llistxattr(fs, linkfile, buf, 512);
+ fprintf(stderr, "llistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lgetxattr(fs, filename, "user.testkey3", buf, 512);
+ fprintf(stderr, "lgetxattr(%s): %d (%s)\n", linkfile, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_removexattr(fs, filename, "user.testkey2");
+ fprintf(stderr, "removexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ fd = glfs_open(fs, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_fsetxattr(fd, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_fgetxattr(fd, "user.testkey2", buf, 512);
+ fprintf(stderr, "fgetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_flistxattr(fd, buf, 512);
+ fprintf(stderr, "flistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_fremovexattr(fd, "user.testkey2");
+ fprintf(stderr, "fremovexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ glfs_close(fd);
+
+ return 0;
+}
+
+int
+test_chdir(glfs_t *fs)
+{
+ int ret = -1;
+ char *dir = "/dir";
+ char *topdir = "/topdir";
+ char *linkdir = "/linkdir";
+ char *linkdir2 = "/linkdir2";
+ char *subdir = "./subdir";
+ char *respath = NULL;
+ char pathbuf[4096];
+
+ ret = glfs_mkdir(fs, topdir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", topdir, strerror(errno));
+ if (ret)
+ return -1;
+
+ ret = glfs_mkdir(fs, dir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", dir, strerror(errno));
+ if (ret)
+ return -1;
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ ret = glfs_symlink(fs, "topdir", linkdir);
+ if (ret) {
+ fprintf(stderr, "symlink(%s, %s): %s\n", topdir, linkdir,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_chdir(fs, linkdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", linkdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (respath) {
+ fprintf(stderr, "realpath(%s) worked unexpectedly: %s\n", subdir,
+ respath);
+ return -1;
+ }
+
+ ret = glfs_mkdir(fs, subdir, 0755);
+ if (ret) {
+ fprintf(stderr, "mkdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(%s): %s\n", subdir, strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(%s) = %s\n", subdir, respath);
+ }
+
+ ret = glfs_chdir(fs, subdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, "/linkdir/subdir", pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(/linkdir/subdir): %s\n", strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(/linkdir/subdir) = %s\n", respath);
+ }
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void
+peek_stat(struct stat *sb)
+{
+ printf("Dumping stat information:\n");
+ printf("File type: ");
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFBLK:
+ printf("block device\n");
+ break;
+ case S_IFCHR:
+ printf("character device\n");
+ break;
+ case S_IFDIR:
+ printf("directory\n");
+ break;
+ case S_IFIFO:
+ printf("FIFO/pipe\n");
+ break;
+ case S_IFLNK:
+ printf("symlink\n");
+ break;
+ case S_IFREG:
+ printf("regular file\n");
+ break;
+ case S_IFSOCK:
+ printf("socket\n");
+ break;
+ default:
+ printf("unknown?\n");
+ break;
+ }
+
+ printf("I-node number: %ld\n", (long)sb->st_ino);
+
+ printf("Mode: %lo (octal)\n",
+ (unsigned long)sb->st_mode);
+
+ printf("Link count: %ld\n", (long)sb->st_nlink);
+ printf("Ownership: UID=%ld GID=%ld\n", (long)sb->st_uid,
+ (long)sb->st_gid);
+
+ printf("Preferred I/O block size: %ld bytes\n", (long)sb->st_blksize);
+ printf("File size: %lld bytes\n", (long long)sb->st_size);
+ printf("Blocks allocated: %lld\n", (long long)sb->st_blocks);
+
+ printf("Last status change: %s", ctime(&sb->st_ctime));
+ printf("Last file access: %s", ctime(&sb->st_atime));
+ printf("Last file modification: %s", ctime(&sb->st_mtime));
+
+ return;
+}
+
+static void
+peek_handle(unsigned char *glid)
+{
+ int i;
+
+ for (i = 0; i < GFAPI_HANDLE_LENGTH; i++) {
+ printf(":%02x:", glid[i]);
+ }
+ printf("\n");
+}
+#else /* DEBUG */
+static void
+peek_stat(struct stat *sb)
+{
+ return;
+}
+
+static void
+peek_handle(unsigned char *id)
+{
+ return;
+}
+#endif /* DEBUG */
+
+glfs_t *fs = NULL;
+char *full_parent_name = "/testdir", *parent_name = "testdir";
+
+void
+test_h_unlink(void)
+{
+ char *my_dir = "unlinkdir";
+ char *my_file = "file.txt";
+ char *my_subdir = "dir1";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL,
+ *subdir = NULL, *subleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_unlink tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subdir = glfs_h_mkdir(fs, dir, my_subdir, 0755, &sb);
+ if (subdir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subleaf = glfs_h_creat(fs, subdir, my_file, O_CREAT, 0644, &sb);
+ if (subleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non empty directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOTEMPTY) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking %s: it is non empty: %s\n",
+ my_subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, subdir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid errno "
+ ",%d, %s\n",
+ my_file, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid "
+ "errno ,%d, %s\n",
+ my_subdir, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, parent, my_dir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_dir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_unlink tests: PASSED\n");
+
+out:
+ if (dir)
+ glfs_h_close(dir);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (subdir)
+ glfs_h_close(subdir);
+ if (subleaf)
+ glfs_h_close(subleaf);
+ if (parent)
+ glfs_h_close(parent);
+
+ return;
+}
+
+void
+test_h_getsetattrs(void)
+{
+ char *my_dir = "attrdir";
+ char *my_file = "attrfile.txt";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb, retsb;
+ int ret, valid;
+ struct timespec timestamp;
+
+ printf("glfs_h_getattrs and setattrs tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_getattrs(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+ /* TODO: Compare stat information */
+
+ retsb.st_mode = 00666;
+ retsb.st_uid = 1000;
+ retsb.st_gid = 1001;
+ ret = clock_gettime(CLOCK_REALTIME, &timestamp);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ retsb.st_atim = timestamp;
+ retsb.st_mtim = timestamp;
+ valid = GFAPI_SET_ATTR_MODE | GFAPI_SET_ATTR_UID | GFAPI_SET_ATTR_GID |
+ GFAPI_SET_ATTR_ATIME | GFAPI_SET_ATTR_MTIME;
+ peek_stat(&retsb);
+
+ ret = glfs_h_setattrs(fs, dir, &retsb, valid);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_setattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ memset(&retsb, 0, sizeof(struct stat));
+ ret = glfs_h_stat(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_stat: error %s: from (%p),%s\n", my_dir, dir,
+ strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+
+ printf("glfs_h_getattrs and setattrs tests: PASSED\n");
+out:
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dir)
+ glfs_h_close(dir);
+
+ return;
+}
+
+void
+test_h_truncate(void)
+{
+ char *my_dir = "truncatedir";
+ char *my_file = "file.txt";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL;
+ struct stat sb;
+ glfs_fd_t *fd = NULL;
+ char buf[32];
+ off_t offset = 0;
+ int ret = 0;
+
+ printf("glfs_h_truncate tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n", my_file,
+ strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ memcpy(buf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, buf, 32, 0);
+
+ /* run tests */
+ /* truncate lower */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate higher */
+ offset = 32;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate equal */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_truncate tests: PASSED\n");
+out:
+ if (fd)
+ glfs_close(fd);
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return;
+}
+
+void
+test_h_links(void)
+{
+ char *my_dir = "linkdir";
+ char *my_file = "file.txt";
+ char *my_symlnk = "slnk.txt";
+ char *my_lnk = "lnk.txt";
+ char *linksrc_dir = "dir1";
+ char *linktgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct glfs_object *ln1 = NULL;
+ struct stat sb;
+ int ret;
+ char *buf = NULL;
+
+ printf("glfs_h_link(s) tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, linksrc_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linksrc_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, linktgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linktgt_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* sym link: /testdir/linkdir/file.txt to ./slnk.txt */
+ ln1 = glfs_h_symlink(fs, parent, my_symlnk, "./file.txt", &sb);
+ if (ln1 == NULL) {
+ fprintf(stderr, "glfs_h_symlink: error creating %s: from (%p),%s\n",
+ my_symlnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ buf = calloc(1024, sizeof(char));
+ if (buf == NULL) {
+ fprintf(stderr, "Error allocating memory\n");
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_h_readlink(fs, ln1, buf, 1024);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_h_readlink: error reading %s: from (%p),%s\n",
+ my_symlnk, ln1, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ if (!(strncmp(buf, my_symlnk, strlen(my_symlnk)))) {
+ fprintf(stderr,
+ "glfs_h_readlink: error mismatch in link name: actual %s: "
+ "retrieved %s\n",
+ my_symlnk, buf);
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ /* link: /testdir/linkdir/file.txt to ./lnk.txt */
+ ret = glfs_h_link(fs, leaf, parent, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ /* link: /testdir/linkdir/dir1/file.txt to ../dir2/slnk.txt */
+ ret = glfs_h_link(fs, dleaf, dirtgt, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, dirtgt, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ printf("glfs_h_link(s) tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+ if (ln1)
+ glfs_h_close(ln1);
+ if (buf)
+ free(buf);
+
+ return;
+}
+
+void
+test_h_rename(void)
+{
+ char *my_dir = "renamedir";
+ char *my_file = "file.txt";
+ char *src_dir = "dir1";
+ char *tgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_rename tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, src_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ src_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, tgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ tgt_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* Rename file.txt -> file1.txt */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "file1.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file1.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1/file.txt -> file.txt */
+ ret = glfs_h_rename(fs, dirsrc, "file.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s/%s to %s (%s)\n",
+ src_dir, "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file1.txt -> file.txt (exists) */
+ ret = glfs_h_rename(fs, parent, "file1.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1 -> dir3 */
+ ret = glfs_h_rename(fs, parent, "dir1", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir1",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir2 ->dir3 (exists) */
+ ret = glfs_h_rename(fs, parent, "dir2", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir2",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file.txt -> dir3 (fail) */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "dir3");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "file.txt", "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir3 -> file.txt (fail) */
+ ret = glfs_h_rename(fs, parent, "dir3", parent, "file.txt");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "dir3", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_rename tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+
+ return;
+}
+
+void
+assimilatetime(struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+void
+test_h_performance(void)
+{
+ char *my_dir = "perftest", *full_dir_path = "/testdir/perftest";
+ char *my_file = "file_", my_file_name[MAXPATHNAME];
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb;
+ int ret, i;
+ struct glfs_fd *fd;
+ struct timespec c_ts = {0, 0}, c_ts_st, c_ts_ed;
+ struct timespec o_ts = {0, 0}, o_ts_st, o_ts_ed;
+
+ printf("glfs_h_performance tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* create performance */
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s%d", my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_lookupat(fs, dir, my_file_name, &sb, 0);
+ if (leaf != NULL) {
+ fprintf(stderr, "glfs_h_lookup: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file_name, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (handle based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+
+ /* create using path */
+ c_ts.tv_sec = o_ts.tv_sec = 0;
+ c_ts.tv_nsec = o_ts.tv_nsec = 0;
+
+ sprintf(my_file_name, "%s1", full_dir_path);
+ ret = glfs_mkdir(fs, my_file_name, 0755);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_mkdir: error creating %s: from (%p),%s\n", my_dir,
+ parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s1/%sn%d", full_dir_path, my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_stat(fs, my_file_name, &sb);
+ if (ret == 0) {
+ fprintf(stderr, "glfs_stat: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, my_file_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_close(fd);
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (path based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+out:
+ return;
+}
+
+int
+test_handleops(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL, *tmp = NULL;
+ char readbuf[32], writebuf[32];
+ unsigned char leaf_handle[GFAPI_HANDLE_LENGTH];
+
+ char *full_leaf_name = "/testdir/testfile.txt", *leaf_name = "testfile.txt",
+ *relative_leaf_name = "testdir/testfile.txt";
+ char *leaf_name1 = "testfile1.txt";
+ char *full_newparent_name = "/testdir/dir1", *newparent_name = "dir1";
+ char *full_newnod_name = "/testdir/nod1", *newnod_name = "nod1";
+
+ /* Initialize test area */
+ ret = glfs_mkdir(fs, full_parent_name, 0755);
+ if (ret != 0 && errno != EEXIST) {
+ fprintf(stderr, "%s: (%p) %s\n", full_parent_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ } else if (ret != 0) {
+ printf("Found test directory %s to be existing\n", full_parent_name);
+ printf("Cleanup test directory and restart tests\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, full_leaf_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "%s: (%p) %s\n", full_leaf_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ glfs_close(fd);
+
+ printf("Initialized the test area, within volume %s\n", argv[1]);
+
+ /* Handle based APIs test area */
+
+ /* glfs_lookupat test */
+ printf("glfs_h_lookupat tests: In Progress\n");
+ /* start at root of the volume */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a parent within root */
+ parent = glfs_h_lookupat(fs, root, parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a leaf/child within the parent */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check absolute paths */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_lookupat(fs, NULL, full_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ /* check multiple component paths */
+ leaf = glfs_h_lookupat(fs, root, relative_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ relative_leaf_name, parent, strerror(errno));
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check symlinks in path */
+
+ /* TODO: -ve test cases */
+ /* parent invalid
+ * path invalid
+ * path does not exist after some components
+ * no parent, but relative path
+ * parent and full path? -ve?
+ */
+
+ printf("glfs_h_lookupat tests: PASSED\n");
+
+ /* glfs_openat test */
+ printf("glfs_h_open tests: In Progress\n");
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 10, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", readbuf, writebuf);
+ glfs_close(fd);
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_close(fd);
+
+ printf("glfs_h_open tests: PASSED\n");
+
+ /* Create tests */
+ printf("glfs_h_creat tests: In Progress\n");
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT | O_EXCL, 0644, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_creat: existing file, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ tmp = glfs_h_creat(fs, root, parent_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || !(errno == EISDIR || errno == EINVAL)) {
+ fprintf(stderr, "glfs_h_creat: dir create, tmp = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close(tmp);
+ tmp = NULL;
+ }
+ }
+
+ /* TODO: Other combinations and -ve cases as applicable */
+ printf("glfs_h_creat tests: PASSED\n");
+
+ /* extract handle and create from handle test */
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: In "
+ "Progress\n");
+ /* TODO: Change the lookup to create below for a GIFD recovery failure,
+ * that needs to be fixed */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name1, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_extract_handle(leaf, leaf_handle, GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_handle(leaf_handle);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_create_from_handle(fs, leaf_handle, GFAPI_HANDLE_LENGTH, &sb);
+ if (leaf == NULL) {
+ fprintf(
+ stderr,
+ "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ leaf_name1, leaf_handle, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 0, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", writebuf, writebuf);
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ glfs_close(fd);
+ goto out;
+ }
+
+ glfs_close(fd);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: PASSED\n");
+
+ /* Mkdir tests */
+ printf("glfs_h_mkdir tests: In Progress\n");
+
+ ret = glfs_rmdir(fs, full_newparent_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_rmdir: Failed for %s: %s\n", full_newparent_name,
+ strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newparent_name, parent, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mkdir tests: PASSED\n");
+
+ /* Mknod tests */
+ printf("glfs_h_mknod tests: In Progress\n");
+ ret = glfs_unlink(fs, full_newnod_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_unlink: Failed for %s: %s\n", full_newnod_name,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, S_IFIFO, 0, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newnod_name, parent, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* TODO: create op on a FIFO node hangs, need to check and fix
+ tmp = glfs_h_creat (fs, parent, newnod_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || errno != EINVAL) {
+ fprintf (stderr, "glfs_h_creat: node create, tmp = (%p), errno =
+ %s\n", tmp, strerror (errno)); printf ("glfs_h_creat/mknod tests:
+ FAILED\n"); if (tmp != NULL) { glfs_h_close(tmp); tmp = NULL;
+ }
+ } */
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, 0644, 0, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mknod: existing node, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mknod tests: PASSED\n");
+
+ /* unlink tests */
+ test_h_unlink();
+
+ /* TODO: opendir tests */
+
+ /* getattr tests */
+ test_h_getsetattrs();
+
+ /* TODO: setattr tests */
+
+ /* truncate tests */
+ test_h_truncate();
+
+ /* link tests */
+ test_h_links();
+
+ /* rename tests */
+ test_h_rename();
+
+ /* performance tests */
+ test_h_performance();
+
+ /* END: New APIs test area */
+
+out:
+ /* Cleanup glfs handles */
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return ret;
+}
+
+int
+test_write_apis(glfs_t *fs)
+{
+ /* Add more content here */
+ /* Some apis we can get are */
+ /*
+ 0. glfs_set_xlator_option()
+
+ Read/Write combinations:
+ . glfs_{p,}readv/{p,}writev
+ . glfs_pread/pwrite
+
+ tests/basic/gfapi/gfapi-async-calls-test.c
+ . glfs_read_async/write_async
+ . glfs_pread_async/pwrite_async
+ . glfs_readv_async/writev_async
+ . glfs_preadv_async/pwritev_async
+
+ . ftruncate/ftruncate_async
+ . fsync/fsync_async
+ . fdatasync/fdatasync_async
+
+ */
+
+ glfs_fd_t *fd = NULL;
+ char *filename = "/filename2";
+ int flags = O_RDWR;
+ char *buf = "some bytes!";
+ char writestr[TEST_STR_LEN];
+ struct iovec iov = {&writestr, TEST_STR_LEN};
+ int ret, i;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+
+ fd = glfs_open(fs, filename, flags);
+ if (!fd)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_writev(fd, &iov, 1, flags);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwrite(fd, buf, 10, 4, flags, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pwrite(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwritev(fd, &iov, 1, 4, flags);
+ if (ret < 0) {
+ fprintf(stderr, "pwritev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_fsync(fd, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "fsync(%s): %d (%s)\n", filename, ret, strerror(errno));
+ }
+
+ glfs_close(fd);
+
+ return 0;
+}
+
+int
+test_metadata_ops(glfs_t *fs, glfs_t *fs2)
+{
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[11] = "helloworld";
+
+ char *filename = "/filename2";
+ int ret;
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ if (!fd)
+ fprintf(stderr, "creat(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ fd2 = glfs_open(fs2, filename, O_RDWR);
+ if (!fd2)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_write(fd, writebuf, 11, 0);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ glfs_fsync(fd, NULL, NULL);
+
+ glfs_lseek(fd2, 5, SEEK_SET);
+
+ ret = glfs_read(fd2, readbuf, 32, 0);
+
+ printf("read %d, %s", ret, readbuf);
+
+ /* get stat */
+ ret = glfs_fstat(fd2, &sb);
+ if (ret)
+ fprintf(stderr, "fstat(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_access(fs, filename, R_OK);
+ if (ret)
+ fprintf(stderr, "access(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_fallocate(fd2, 1024, 1024, 1024);
+ if (ret)
+ fprintf(stderr, "fallocate(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_discard(fd2, 1024, 512);
+ if (ret)
+ fprintf(stderr, "discard(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_zerofill(fd2, 2048, 1024);
+ if (ret)
+ fprintf(stderr, "zerofill(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ /* set stat */
+ /* TODO: got some errors, need to fix */
+ ret = glfs_fsetattr(fd2, &gsb);
+
+ glfs_close(fd);
+ glfs_close(fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod(fs, filename, S_IFIFO, 0);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rename(fs, filename, "/filename4");
+ if (ret)
+ fprintf(stderr, "rename(%s): (%d) %s\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_unlink(fs, "/filename4");
+ if (ret)
+ fprintf(stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror(errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir(fs, filename, 0);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rmdir(fs, filename);
+ if (ret)
+ fprintf(stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror(errno));
+}
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs2 = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[32];
+ char volumeid[64];
+
+ char *filename = "/filename2";
+
+ if ((argc < 2) || (argc > 3)) {
+ printf("Usage:\n\t%s <volname> <hostname>\n\t%s <volfile-path>",
+ argv[0], argv[0]);
+ return -1;
+ }
+
+ if (argc == 2) {
+ /* Generally glfs_new() requires volume name as an argument */
+ fs = glfs_new("test-only");
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile(fs, argv[1]);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile failed\n");
+ } else {
+ fs = glfs_new(argv[1]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+ // ret = glfs_set_volfile_server (fs, "unix", "/tmp/gluster.sock", 0);
+ ret = glfs_set_volfile_server(fs, "tcp", argv[2], 24007);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile_server failed\n");
+ }
+
+ /* Change this to relevant file when running locally */
+ ret = glfs_set_logging(fs, "/dev/stderr", 5);
+ if (ret)
+ fprintf(stderr, "glfs_set_logging failed\n");
+
+ ret = glfs_init(fs);
+ if (ret)
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ if (ret)
+ goto out;
+
+ /* no major use for getting the volume id in this test, done for coverage */
+ ret = glfs_get_volumeid(fs, volumeid, 64);
+ if (ret) {
+ fprintf(stderr, "glfs_get_volumeid: returned %d\n", ret);
+ }
+
+ sleep(2);
+
+ if (argc == 2) {
+ /* Generally glfs_new() requires volume name as an argument */
+ fs2 = glfs_new("test_only_volume");
+ if (!fs2) {
+ fprintf(stderr, "glfs_new(fs2): returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile(fs2, argv[1]);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile failed(fs2)\n");
+ } else {
+ fs2 = glfs_new(argv[1]);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new(fs2): returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile_server(fs2, "tcp", argv[2], 24007);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile_server failed(fs2)\n");
+ }
+
+ ret = glfs_set_statedump_path(fs2, "/tmp");
+ if (ret) {
+ fprintf(stderr, "glfs_set_statedump_path: %s\n", strerror(errno));
+ }
+
+ ret = glfs_init(fs2);
+ if (ret)
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ test_metadata_ops(fs, fs2);
+
+ test_dirops(fs);
+
+ test_xattr(fs);
+
+ test_chdir(fs);
+
+ test_handleops(argc, argv);
+ // done
+
+ /* Test some extra apis */
+ test_write_apis(fs);
+
+ glfs_statvfs(fs, "/", &sfs);
+
+ glfs_unset_volfile_server(fs, "tcp", argv[2], 24007);
+
+ glfs_fini(fs);
+ glfs_fini(fs2);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/tests/basic/gfapi/glfsxmp.t b/tests/basic/gfapi/glfsxmp.t
new file mode 100644
index 00000000000..b3e6645c0f5
--- /dev/null
+++ b/tests/basic/gfapi/glfsxmp.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+$CLI system getspec $V0 > fubar.vol
+
+TEST cp $(dirname $0)/glfsxmp-coverage.c ./glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+
+TEST ./glfsxmp fubar.vol
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/basic/gfapi/protocol-client-ssl.vol.in b/tests/basic/gfapi/protocol-client-ssl.vol.in
new file mode 100644
index 00000000000..cdc0c9d0671
--- /dev/null
+++ b/tests/basic/gfapi/protocol-client-ssl.vol.in
@@ -0,0 +1,15 @@
+#
+# This .vol file expects that there is
+#
+# 1. GlusterD listening on @@HOSTNAME@@
+# 2. a volume that provides a brick on @@BRICKPATH@@
+# 3. the volume with the brick has been started
+#
+volume test
+ type protocol/client
+ option remote-host @@HOSTNAME@@
+ option remote-subvolume @@BRICKPATH@@
+ option transport-type socket
+ option transport.socket.ssl-enabled @@SSL@@
+end-volume
+
diff --git a/tests/basic/global-threading.t b/tests/basic/global-threading.t
new file mode 100644
index 00000000000..f7d34044b09
--- /dev/null
+++ b/tests/basic/global-threading.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# Test if the given process has a number of threads of a given type between
+# min and max.
+function check_threads() {
+ local pid="${1}"
+ local pattern="${2}"
+ local min="${3}"
+ local max="${4-}"
+ local count
+
+ count="$(ps hH -o comm ${pid} | grep "${pattern}" | wc -l)"
+ if [[ ${min} -gt ${count} ]]; then
+ return 1
+ fi
+ if [[ ! -z "${max}" && ${max} -lt ${count} ]]; then
+ return 1
+ fi
+
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+
+# Glusterd shouldn't use any thread
+TEST check_threads $(get_glusterd_pid) glfs_tpw 0 0
+TEST check_threads $(get_glusterd_pid) glfs_iotwr 0 0
+
+TEST pkill -9 glusterd
+
+TEST glusterd --global-threading
+
+# Glusterd shouldn't use global threads, even if enabled
+TEST check_threads $(get_glusterd_pid) glfs_tpw 0 0
+TEST check_threads $(get_glusterd_pid) glfs_iotwr 0 0
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/b{0,1}
+
+# Normal configuration using io-threads on bricks
+TEST $CLI volume set $V0 config.global-threading off
+TEST $CLI volume set $V0 performance.iot-pass-through off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume start $V0
+
+# There shouldn't be global threads
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_tpw 0 0
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_tpw 0 0
+
+# There should be at least 1 io-thread
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_iotwr 1
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_iotwr 1
+
+# Self-heal should be using global threads
+TEST check_threads $(get_shd_process_pid) glfs_tpw 1
+TEST check_threads $(get_shd_process_pid) glfs_iotwr 0 0
+
+TEST $CLI volume stop $V0
+
+# Configuration with global threads on bricks
+TEST $CLI volume set $V0 config.global-threading on
+TEST $CLI volume set $V0 performance.iot-pass-through on
+TEST $CLI volume start $V0
+
+# There should be at least 1 global thread
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_tpw 1
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_tpw 1
+
+# There shouldn't be any io-thread worker threads
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_iotwr 0 0
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_iotwr 0 0
+
+# Normal configuration using io-threads on clients
+TEST $CLI volume set $V0 performance.iot-pass-through off
+TEST $CLI volume set $V0 performance.client-io-threads on
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+# There shouldn't be global threads
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_tpw 0 0
+
+# There should be at least 1 io-thread
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_iotwr 1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Configuration with global threads on clients
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --global-threading $M0
+
+# There should be at least 1 global thread
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_tpw 1
+
+# There shouldn't be io-threads
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_iotwr 0 0
+
+# Some basic volume access checks with global-threading enabled everywhere
+TEST mkdir ${M0}/dir
+TEST dd if=/dev/zero of=${M0}/dir/file bs=128k count=8
+
+cleanup
diff --git a/tests/basic/glusterd-restart-shd-mux.t b/tests/basic/glusterd-restart-shd-mux.t
new file mode 100644
index 00000000000..46d0dac2fce
--- /dev/null
+++ b/tests/basic/glusterd-restart-shd-mux.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=20
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 3); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+#Stop the glusterd
+TEST pkill glusterd
+#Only stopping glusterd, so there will be one shd
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" shd_count
+TEST glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+shd_pid=$(get_shd_mux_pid $V0)
+for i in $(seq 1 3); do
+ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
+ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
+done
+
+#Reboot a node scenario
+TEST pkill gluster
+#Only stopped glusterd, so there will be one shd
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+TEST glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+shd_pid=$(get_shd_mux_pid $V0)
+for i in $(seq 1 3); do
+ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
+ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
+done
+
+for i in $(seq 1 3); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+TEST touch $M0/foo{1..100}
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^204$" get_pending_heal_count $V0
+
+TEST $CLI volume start ${V0} force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -rf $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/glusterd/arbiter-volume.t b/tests/basic/glusterd/arbiter-volume.t
deleted file mode 100644
index e9edf046905..00000000000
--- a/tests/basic/glusterd/arbiter-volume.t
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-
-# This command tests the volume create command validation for arbiter volumes.
-
-cleanup;
-TEST glusterd
-TEST pidof glusterd
-
-TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
-EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
-
-TEST $CLI volume delete $V0
-TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/b{4..9}
-EXPECT "2 x \(2 \+ 1\) = 6" volinfo_field $V0 "Number of Bricks"
-
-TEST $CLI volume delete $V0
-
-TEST rm -rf $B0/b{1..3}
-TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
-EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
-TEST killall -15 glusterd
-TEST glusterd
-TEST pidof glusterd
-EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
-
-#cleanup
diff --git a/tests/basic/glusterd/check-cloudsync-ancestry.t b/tests/basic/glusterd/check-cloudsync-ancestry.t
new file mode 100644
index 00000000000..ff6ffee8db7
--- /dev/null
+++ b/tests/basic/glusterd/check-cloudsync-ancestry.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# When shard and cloudsync xlators enabled on a volume, shard xlator
+# should be an ancestor of cloudsync. This testcase is to check this condition.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+
+volfile=$(gluster system:: getwd)"/vols/$V0/trusted-$V0.tcp-fuse.vol"
+
+#Test that both shard and cloudsync are not loaded
+EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+#Enable shard and cloudsync in that order and check if volfile is correct
+TEST $CLI volume set $V0 shard on
+TEST $CLI volume set $V0 cloudsync on
+
+#Test that both shard and cloudsync are loaded
+EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync
+
+#Disable shard and cloudsync
+TEST $CLI volume set $V0 shard off
+TEST $CLI volume set $V0 cloudsync off
+
+#Test that both shard and cloudsync are not loaded
+EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+#Enable cloudsync and shard in that order and check if volfile is correct
+TEST $CLI volume set $V0 cloudsync on
+TEST $CLI volume set $V0 shard on
+
+#Test that both shard and cloudsync are loaded
+EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync
+
+cleanup;
diff --git a/tests/basic/glusterd/disperse-create.t b/tests/basic/glusterd/disperse-create.t
index 384c675c882..db8a621d48e 100644
--- a/tests/basic/glusterd/disperse-create.t
+++ b/tests/basic/glusterd/disperse-create.t
@@ -20,6 +20,10 @@ TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/b7 $H0:$B0/b8 $H0:$B
EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse-data 2 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
TEST $CLI volume create $V0 redundancy 1 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12
EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
diff --git a/tests/basic/glusterd/heald.t b/tests/basic/glusterd/heald.t
index ca112ad0b75..7dae3c3f0fb 100644
--- a/tests/basic/glusterd/heald.t
+++ b/tests/basic/glusterd/heald.t
@@ -7,11 +7,16 @@
# Covers enable/disable at the moment. Will be enhanced later to include
# the other commands as well.
+function is_pid_running {
+ local pid=$1
+ num=`ps auxww | grep glustershd | grep $pid | grep -v grep | wc -l`
+ echo $num
+}
+
cleanup;
TEST glusterd
TEST pidof glusterd
-volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol"
#Commands should fail when volume doesn't exist
TEST ! $CLI volume heal non-existent-volume enable
TEST ! $CLI volume heal non-existent-volume disable
@@ -20,51 +25,55 @@ TEST ! $CLI volume heal non-existent-volume disable
# volumes
TEST $CLI volume create dist $H0:$B0/dist
TEST $CLI volume start dist
-TEST "[ -z $(get_shd_process_pid)]"
+TEST "[ -z $(get_shd_process_pid dist)]"
TEST ! $CLI volume heal dist enable
TEST ! $CLI volume heal dist disable
# Commands should work on replicate/disperse volume.
TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
-TEST "[ -z $(get_shd_process_pid)]"
+TEST "[ -z $(get_shd_process_pid r2)]"
TEST $CLI volume start r2
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
TEST $CLI volume heal r2 enable
EXPECT "enable" volume_option r2 "cluster.self-heal-daemon"
-EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+volfiler2=$(gluster system:: getwd)"/vols/r2/r2-shd.vol"
+EXPECT "enable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
+pid=$( get_shd_process_pid r2 )
TEST $CLI volume heal r2 disable
EXPECT "disable" volume_option r2 "cluster.self-heal-daemon"
-EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+EXPECT "disable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
+EXPECT "1" is_pid_running $pid
# Commands should work on disperse volume.
TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2
TEST $CLI volume start ec2
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
TEST $CLI volume heal ec2 enable
EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon"
-EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+volfileec2=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol"
+EXPECT "enable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
+pid=$(get_shd_process_pid ec2)
TEST $CLI volume heal ec2 disable
EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon"
-EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+EXPECT "disable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
+EXPECT "1" is_pid_running $pid
#Check that shd graph is rewritten correctly on volume stop/start
-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
+
+EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
TEST $CLI volume stop r2
-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
-EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
TEST $CLI volume stop ec2
# When both the volumes are stopped glustershd volfile is not modified just the
# process is stopped
-TEST "[ -z $(get_shd_process_pid) ]"
+TEST "[ -z $(get_shd_process_pid dist) ]"
+TEST "[ -z $(get_shd_process_pid ec2) ]"
TEST $CLI volume start r2
-EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse
-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate
+EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
TEST $CLI volume set r2 self-heal-daemon on
TEST $CLI volume set r2 cluster.self-heal-daemon off
diff --git a/tests/basic/glusterd/thin-arbiter-volume-probe.t b/tests/basic/glusterd/thin-arbiter-volume-probe.t
new file mode 100644
index 00000000000..acc6943806d
--- /dev/null
+++ b/tests/basic/glusterd/thin-arbiter-volume-probe.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+#This tests if the thin-arbiter-count is transferred to the other peer.
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+kill_glusterd 2
+$CLI_1 volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{1..3}
+TEST $glusterd_2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+EXPECT "1 x 2 = 2" volinfo_field_1 $V0 "Number of Bricks"
+EXPECT "1 x 2 = 2" volinfo_field_2 $V0 "Number of Bricks"
+
+cleanup;
diff --git a/tests/basic/glusterd/thin-arbiter-volume.t b/tests/basic/glusterd/thin-arbiter-volume.t
new file mode 100644
index 00000000000..4e813890a45
--- /dev/null
+++ b/tests/basic/glusterd/thin-arbiter-volume.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../ volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+
+#This command tests the volume create command validation for thin-arbiter volumes.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/a.txt
+TEST ls $B0/b1/a.txt
+TEST ls $B0/b2/a.txt
+TEST ! ls $B0/b3/a.txt
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{4..8}
+EXPECT "2 x 2 = 4" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+
+TEST rm -rf $B0/b{1..3}
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+TEST killall -15 glusterd
+TEST glusterd
+TEST pidof glusterd
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+cleanup
+
diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t
index 309060919b7..e11cfed509a 100644
--- a/tests/basic/glusterd/volfile_server_switch.t
+++ b/tests/basic/glusterd/volfile_server_switch.t
@@ -34,7 +34,7 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --vol
TEST kill_glusterd 1
-TEST $CLI_2 volume set $V0 performance.io-cache off
+TEST $CLI_2 volume set $V0 performance.write-behind off
# make sure by this time directory will be created
# TODO: suggest ideal time to wait
diff --git a/tests/basic/glusterd/volume-brick-count.t b/tests/basic/glusterd/volume-brick-count.t
new file mode 100644
index 00000000000..dc1a5278f4f
--- /dev/null
+++ b/tests/basic/glusterd/volume-brick-count.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function test_volume_config()
+{
+ volname=$1
+ type_string=$2
+ brickCount=$3
+ distCount=$4
+ replicaCount=$5
+ arbiterCount=$6
+ disperseCount=$7
+ redundancyCount=$8
+
+ EXPECT "$type_string" volinfo_field $volname "Number of Bricks"
+ EXPECT "$brickCount" get-xml "volume info $volname" "brickCount"
+ EXPECT "$distCount" get-xml "volume info $volname" "distCount"
+ EXPECT "$replicaCount" get-xml "volume info $volname" "replicaCount"
+ EXPECT "$arbiterCount" get-xml "volume info $volname" "arbiterCount"
+ EXPECT "$disperseCount" get-xml "volume info $volname" "disperseCount"
+ EXPECT "$redundancyCount" get-xml "volume info $volname" "redundancyCount"
+}
+
+# This command tests the volume create command and number of bricks for different volume types.
+cleanup;
+TESTS_EXPECTED_IN_LOOP=56
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create ${V0}_1 replica 3 arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+test_volume_config "${V0}_1" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+
+TEST $CLI volume create ${V0}_2 replica 3 arbiter 1 $H0:$B0/b{4..9}
+test_volume_config "${V0}_2" "2 x \(2 \+ 1\) = 6" "6" "2" "3" "1" "0" "0"
+
+
+TEST $CLI volume create ${V0}_3 replica 3 arbiter 1 $H0:$B0/b{10..12}
+test_volume_config "${V0}_3" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+TEST killall -15 glusterd
+TEST glusterd
+TEST pidof glusterd
+test_volume_config "${V0}_3" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+
+TEST $CLI volume create ${V0}_4 replica 3 $H0:$B0/b{13..15}
+test_volume_config "${V0}_4" "1 x 3 = 3" "3" "1" "3" "0" "0" "0"
+
+TEST $CLI volume create ${V0}_5 replica 3 $H0:$B0/b{16..21}
+test_volume_config "${V0}_5" "2 x 3 = 6" "6" "2" "3" "0" "0" "0"
+
+TEST $CLI volume create ${V0}_6 disperse 3 redundancy 1 $H0:$B0/b{22..24}
+test_volume_config "${V0}_6" "1 x \(2 \+ 1\) = 3" "3" "1" "1" "0" "3" "1"
+
+TEST $CLI volume create ${V0}_7 disperse 3 redundancy 1 $H0:$B0/b{25..30}
+test_volume_config "${V0}_7" "2 x \(2 \+ 1\) = 6" "6" "2" "1" "0" "3" "1"
+
+TEST $CLI volume create ${V0}_8 $H0:$B0/b{31..33}
+test_volume_config "${V0}_8" "3" "3" "3" "1" "0" "0" "0"
+
+cleanup
diff --git a/tests/basic/graph-cleanup-brick-down-shd-mux.t b/tests/basic/graph-cleanup-brick-down-shd-mux.t
new file mode 100644
index 00000000000..3c621cdcc26
--- /dev/null
+++ b/tests/basic/graph-cleanup-brick-down-shd-mux.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 2); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#kill one brick and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+#kill an entire subvol and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+#wait for some time to create a race sceanrio
+sleep 1
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+#kill all bricks and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+#wait for some time to create a race sceanrio
+sleep 2
+
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+cleanup
diff --git a/tests/basic/logchecks-messages.h b/tests/basic/logchecks-messages.h
index 1ae68646445..bf364848ec7 100644
--- a/tests/basic/logchecks-messages.h
+++ b/tests/basic/logchecks-messages.h
@@ -11,7 +11,7 @@
#ifndef _LOGCHECKS_MESSAGES_H_
#define _LOGCHECKS_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* NOTE: Rules for message additions
* 1) Each instance of a message is _better_ left with a unique message ID, even
diff --git a/tests/basic/logchecks.c b/tests/basic/logchecks.c
index 86891057230..df0be28ace0 100644
--- a/tests/basic/logchecks.c
+++ b/tests/basic/logchecks.c
@@ -11,9 +11,9 @@
#include <stdio.h>
#include <unistd.h>
-#include "glusterfs.h"
-#include "globals.h"
-#include "logging.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/logging.h>
#include "logchecks-messages.h"
#include "../../libglusterfs/src/logging.h"
diff --git a/tests/basic/metadisp/fsyncdir.c b/tests/basic/metadisp/fsyncdir.c
new file mode 100644
index 00000000000..62b532b9ce4
--- /dev/null
+++ b/tests/basic/metadisp/fsyncdir.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int pfd;
+
+ pfd = open(argv[1], O_RDONLY | O_DIRECTORY);
+ if (pfd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (rename(argv[2], argv[3]) == (-1)) {
+ perror("rename");
+ return EXIT_FAILURE;
+ }
+
+ if (fsync(pfd) == (-1)) {
+ perror("fsync");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/ftruncate.c b/tests/basic/metadisp/ftruncate.c
new file mode 100644
index 00000000000..c9185212c31
--- /dev/null
+++ b/tests/basic/metadisp/ftruncate.c
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int pfd;
+
+ pfd = open(argv[1], O_RDWR);
+ if (pfd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (ftruncate(pfd, 0) == (-1)) {
+ perror("ftruncate");
+ return EXIT_FAILURE;
+ }
+
+ if (write(pfd, "hello", 5) == (-1)) {
+ perror("write");
+ return EXIT_FAILURE;
+ }
+
+ if (fsync(pfd) == (-1)) {
+ perror("fsync");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/fxattr.c b/tests/basic/metadisp/fxattr.c
new file mode 100644
index 00000000000..e552057778a
--- /dev/null
+++ b/tests/basic/metadisp/fxattr.c
@@ -0,0 +1,107 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+
+static char MY_XATTR[] = "user.fxtest";
+static char *PROGRAM;
+#define CONSUME(v) \
+ do { \
+ if (!argc) { \
+ fprintf(stderr, "missing argument\n"); \
+ return EXIT_FAILURE; \
+ } \
+ v = argv[0]; \
+ ++argv; \
+ --argc; \
+ } while (0)
+
+static int
+do_get(int argc, char **argv, int fd)
+{
+ char *value;
+ int ret;
+ char buf[1024];
+
+ CONSUME(value);
+
+ ret = fgetxattr(fd, MY_XATTR, buf, sizeof(buf));
+ if (ret == (-1)) {
+ perror("fgetxattr");
+ return EXIT_FAILURE;
+ }
+
+ if (strncmp(buf, value, ret) != 0) {
+ fprintf(stderr, "data mismatch\n");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int
+do_set(int argc, char **argv, int fd)
+{
+ char *value;
+ int ret;
+
+ CONSUME(value);
+
+ ret = fsetxattr(fd, MY_XATTR, value, strlen(value), 0);
+ if (ret == (-1)) {
+ perror("fsetxattr");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int
+do_remove(int argc, char **argv, int fd)
+{
+ int ret;
+
+ ret = fremovexattr(fd, MY_XATTR);
+ if (ret == (-1)) {
+ perror("femovexattr");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd;
+ char *path;
+ char *cmd;
+
+ CONSUME(PROGRAM);
+ CONSUME(path);
+ CONSUME(cmd);
+
+ fd = open(path, O_RDWR);
+ if (fd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (strcmp(cmd, "get") == 0) {
+ return do_get(argc, argv, fd);
+ }
+
+ if (strcmp(cmd, "set") == 0) {
+ return do_set(argc, argv, fd);
+ }
+
+ if (strcmp(cmd, "remove") == 0) {
+ return do_remove(argc, argv, fd);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/gfs-fsetxattr.c b/tests/basic/metadisp/gfs-fsetxattr.c
new file mode 100644
index 00000000000..63578bc528f
--- /dev/null
+++ b/tests/basic/metadisp/gfs-fsetxattr.c
@@ -0,0 +1,141 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int gfapi = 1;
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ char *basename = NULL;
+ char *dir1 = NULL, *dir2 = NULL, *filename1 = NULL, *filename2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+
+ if (argc != 5) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file> <basename>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ logfile = argv[3];
+ basename = argv[4];
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&dir1, "%s-dir", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = glfs_mkdir(fs, dir1, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir(%s): %s\n", dir1, strerror(errno));
+ return -1;
+ }
+
+ fd = glfs_opendir(fs, dir1);
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.dirfattr", "fsetxattr", 9, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_closedir(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_closedir failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&filename1, "%s-file", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&filename2, "%s-file-renamed", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ fd = glfs_creat(fs, filename1, O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", filename1, fd, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_rename(fs, filename1, filename2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_rename failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_lstat(fs, filename2, &sb);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_lstat failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.filefattr", "fsetxattr", 9, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_close failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+}
diff --git a/tests/basic/metadisp/metadisp.t b/tests/basic/metadisp/metadisp.t
new file mode 100644
index 00000000000..894ffe07226
--- /dev/null
+++ b/tests/basic/metadisp/metadisp.t
@@ -0,0 +1,316 @@
+#!/usr/bin/env bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+
+# Considering `--enable-metadisp` is an option for `./configure`,
+# which is disabled by default, this test will never pass regression.
+# But to see the value of this test, run below after configuring
+# with above option :
+# `prove -vmfe '/bin/bash' tests/basic/metadisp/metadisp.t`
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST
+
+cleanup;
+
+TEST mkdir -p $B0/b0/{0,1}
+
+TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/0
+TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/1
+
+TEST $GFS --volfile=$(dirname $0)/metadisp.vol --volfile-id=$V0 $M0;
+
+NUM_FILES=40
+TEST touch $M0/{1..${NUM_FILES}}
+
+# each drive should get 40 files
+TEST [ $(dir -1 $B0/b0/0/ | wc -l) -eq $NUM_FILES ]
+TEST [ $(dir -1 $B0/b0/1/ | wc -l) -eq $NUM_FILES ]
+
+# now write some data to a file
+echo "hello" > $M0/3
+filename=$$
+echo "hello" > /tmp/metadisp-write-${filename}
+checksum=$(md5sum /tmp/metadisp-write-${filename} | awk '{print $1}')
+TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ]
+
+# check that the backend file exists on b1
+gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/3))
+TEST [ $(dir -1 $B0/b0/1/$gfid | wc -l) -eq 1 ]
+
+# check that the backend file matches the frontend
+TEST [ "$(md5sum $B0/b0/1/$gfid | awk '{print $1}')" == "$checksum" ]
+
+# delete the file
+TEST rm $M0/3
+
+# ensure the frontend and backend files are cleaned up
+TEST ! -e $M0/3
+TEST ! [ stat $B0/b*/*/$gfid ]
+
+# Test TRUNCATE + WRITE flow
+echo "hello" | tee $M0/4
+echo "goo" | tee $M0/4
+filename=$$
+echo "goo" | tee /tmp/metadisp-truncate-${filename}
+checksum=$(md5sum /tmp/metadisp-truncate-${filename} | awk '{print $1}')
+TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ]
+
+# Test mkdir + rmdir.
+TEST mkdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+TEST rmdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+
+# Test rename.
+TEST touch $M0/rename_me
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+TEST mv $M0/rename_me $M0/such_rename
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+
+# Test rename of a file that doesn't exist.
+TEST ! mv $M0/does-not-exist $M0/neither-does-this
+
+
+# cleanup all the other files.
+TEST rm -v $M0/1 $M0/2 $M0/{4..${NUM_FILES}}
+TEST rm $M0/such_rename
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ]
+
+# Test CREATE flow
+NUM_FILES=40
+TEST touch $M0/{1..${NUM_FILES}}
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq $NUM_FILES ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq $NUM_FILES ]
+
+# Test UNLINK flow
+# No drives should have any files
+TEST rm -v $M0/{1..${NUM_FILES}}
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ]
+
+# Test CREATE + WRITE + READ flow
+filename=$$
+dd if=/dev/urandom of=/tmp/${filename} bs=1M count=10
+checksum=$(md5sum /tmp/${filename} | awk '{print $1}')
+TEST cp -v /tmp/${filename} $M0/1
+TEST cp -v /tmp/${filename} $M0/2
+TEST cp -v /tmp/${filename} $M0/3
+TEST cp -v /tmp/${filename} $M0/4
+TEST [ "$(md5sum $M0/1 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/2 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ]
+
+# Test TRUNCATE + WRITE flow
+TEST dd if=/dev/zero of=$M0/1 bs=1M count=20
+
+# Check that readdir stats the files properly and we get the correct sizes
+TEST [ $(find $M0 -size +9M | wc -l) -eq 4 ];
+
+# Test mkdir + rmdir.
+TEST mkdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+TEST rmdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+
+# Test rename.
+# Still flaky, so disabled until it can be debugged.
+TEST touch $M0/rename_me
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+TEST mv $M0/rename_me $M0/such_rename
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+
+# Test rename of a file that doesn't exist.
+TEST ! mv $M0/does-not-exist $M0/neither-does-this
+
+# Test rename over an existing file.
+ok=yes
+for i in $(seq 0 9); do
+ echo foo > $M0/src$i
+ echo bar > $M0/dst$i
+done
+for i in $(seq 0 9); do
+ mv $M0/src$i $M0/dst$i
+done
+for i in $(seq 0 9); do
+ nfiles=$(cat $B0/b0/*/dst$i | wc -l)
+ if [ "$nfiles" = "2" ]; then
+ echo "COLLISION on dst$i"
+ (ls -l $B0/b0/*/dst$i; cat $B0/b0/*/dst$i) | sed "/^/s// /"
+ ok=no
+ fi
+done
+EXPECT "yes" echo $ok
+
+# Test rename of a directory.
+count_copies () {
+ ls -d $B0/b?/?/$1 2> /dev/null | wc -l
+}
+TEST mkdir $M0/foo_dir
+EXPECT 1 count_copies foo_dir
+EXPECT 0 count_copies bar_dir
+TEST mv $M0/foo_dir $M0/bar_dir
+EXPECT 0 count_copies foo_dir
+EXPECT 1 count_copies bar_dir
+
+for x in $(seq 0 99); do
+ touch $M0/target$x
+ ln -s $M0/target$x $M0/link$x
+done
+on_0=$(ls $B0/b*/0/link* | wc -l)
+on_1=$(ls $B0/b*/1/link* | wc -l)
+TEST [ "$on_0" -eq 100 ]
+TEST [ "$on_1" -eq 0 ]
+TEST [ "$(ls -l $M0/link* | wc -l)" = 100 ]
+
+# Test (hard) link.
+_test_hardlink () {
+ local b
+ local has_src
+ local has_dst
+ local src_inum
+ local dst_inum
+ touch $M0/hardsrc$1
+ ln $M0/hardsrc$1 $M0/harddst$1
+ for b in $B0/b{0}/{0,1}; do
+ [ -f $b/hardsrc$1 ]; has_src=$?
+ [ -f $b/harddst$1 ]; has_dst=$?
+ if [ "$has_src" != "$has_dst" ]; then
+ echo "MISSING $b/hardxxx$1 $has_src $has_dst"
+ return
+ fi
+ if [ "$has_src$has_dst" = "00" ]; then
+ src_inum=$(stat -c '%i' $b/hardsrc$1)
+ dst_inum=$(stat -c '%i' $b/harddst$1)
+ if [ "$dst_inum" != "$src_inum" ]; then
+ echo "MISMATCH $b/hardxx$i $src_inum $dst_inum"
+ return
+ fi
+ fi
+ done
+ echo "OK"
+}
+
+test_hardlink () {
+ local result=$(_test_hardlink $*)
+ # [ "$result" = "OK" ] || echo $result > /dev/tty
+ echo $result
+}
+
+# Do this multiple times to make sure colocation isn't a fluke.
+EXPECT "OK" test_hardlink 0
+EXPECT "OK" test_hardlink 1
+EXPECT "OK" test_hardlink 2
+EXPECT "OK" test_hardlink 3
+EXPECT "OK" test_hardlink 4
+EXPECT "OK" test_hardlink 5
+EXPECT "OK" test_hardlink 6
+EXPECT "OK" test_hardlink 7
+EXPECT "OK" test_hardlink 8
+EXPECT "OK" test_hardlink 9
+
+# Test remove hardlink source. ensure deleting one file
+# doesn't delete the data unless link-count is 1
+TEST mkdir $M0/hardlink
+TEST touch $M0/hardlink/fileA
+echo "data" >> $M0/hardlink/fileA
+checksum=$(md5sum $M0/hardlink/fileA | awk '{print $1}')
+TEST ln $M0/hardlink/fileA $M0/hardlink/fileB
+TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 2 ]
+TEST rm $M0/hardlink/fileA
+TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 1 ]
+TEST [ "$(md5sum $M0/hardlink/fileB | awk '{print $1}')" == "$checksum" ]
+
+#
+# FIXME: statfs values look ok but the test is bad
+#
+# Test statfs. If we're doing it right, the numbers for the mountpoint should be
+# double those for the brick filesystem times the number of bricks,
+# but unless we're on a completely idle
+# system (which never happens) the numbers can change even while this function
+# runs and that would trip us up. Do a sloppy comparison to deal with that.
+#compare_fields () {
+# val1=$(df $1 | grep / | awk "{print \$$3}")
+# val2=$(df $2 | grep / | awk "{print \$$3}")
+# [ "$val2" -gt "$(((val1/(29/10))*19/10))" -a "$val2" -lt "$(((val1/(31/10))*21/10))" ]
+#}
+
+#brick_df=$(df $B0 | grep /)
+#mount_df=$(df $M0 | grep /)
+#TEST compare_fields $B0 $M0 2 # Total blocks
+#TEST compare_fields $B0 $M0 3 # Used
+#TEST compare_fields $B0 $M0 4 # Available
+
+# Test removexattr.
+#RXATTR_FILE=$(get_file_not_on_disk0 rxtest)
+#TEST setfattr -n user.foo -v bar $M0/$RXATTR_FILE
+#TEST getfattr -n user.foo $B0/b0/1/$RXATTR_FILE
+#TEST setfattr -x user.foo $M0/$RXATTR_FILE
+#TEST ! getfattr -n user.foo $B0/b0/1/$RXATTR_FILE
+
+# Test fsyncdir. We can't really test whether it's doing the right thing,
+# but we can test that it doesn't fail and we can hand-check that it's calling
+# down to all of the disks instead of just one.
+#
+# P.S. There's no fsyncdir test in the rest of Gluster, so who even knows if
+# other translators are handling it correctly?
+
+#FSYNCDIR_EXE=$(dirname $0)/fsyncdir
+#build_tester ${FSYNCDIR_EXE}.c
+#TEST touch $M0/fsyncdir_src
+#TEST $FSYNCDIR_EXE $M0 $M0/fsyncdir_src $M0/fsyncdir_dst
+#TEST rm -f $FSYNCDIR_EXE
+
+# Test fsetxattr, fgetxattr, fremovexattr (in that order).
+FXATTR_FILE=$M0/fxfile1
+TEST touch $FXATTR_FILE
+FXATTR_EXE=$(dirname $0)/fxattr
+build_tester ${FXATTR_EXE}.c
+TEST ! getfattr -n user.fxtest $FXATTR_FILE
+TEST $FXATTR_EXE $FXATTR_FILE set value1
+TEST getfattr -n user.fxtest $FXATTR_FILE
+TEST setfattr -n user.fxtest -v value2 $FXATTR_FILE
+TEST $FXATTR_EXE $FXATTR_FILE get value2
+TEST $FXATTR_EXE $FXATTR_FILE remove
+TEST ! getfattr -n user.fxtest $FXATTR_FILE
+TEST rm -f $FXATTR_EXE
+
+# Test ftruncate
+FTRUNCATE_EXE=$(dirname $0)/ftruncate
+build_tester ${FTRUNCATE_EXE}.c
+FTRUNCATE_FILE=$M0/ftfile1
+TEST dd if=/dev/urandom of=$FTRUNCATE_FILE count=1 bs=1MB
+TEST $FTRUNCATE_EXE $FTRUNCATE_FILE
+#gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/ftfile1))
+
+# Test fallocate, discard, zerofill. Actually we don't so much check that these
+# *work* as that they don't throw any errors (especially ENOENT because the
+# file's not on disk zero).
+FALLOC_FILE=fatest1
+TEST touch $M0/$FALLOC_FILE
+TEST fallocate -l $((4096*5)) $M0/$FALLOC_FILE
+TEST fallocate -p -o 4096 -l 4096 $M0/$FALLOC_FILE
+# This actually fails with "operation not supported" on most filesystems, so
+# don't leave it enabled except to test changes.
+#TEST fallocate -z -o $((4096*3)) -l 4096 $M0/$FALLOC_FILE
+
+#cleanup;
diff --git a/tests/basic/metadisp/metadisp.vol b/tests/basic/metadisp/metadisp.vol
new file mode 100644
index 00000000000..58ae2f6f2a8
--- /dev/null
+++ b/tests/basic/metadisp/metadisp.vol
@@ -0,0 +1,14 @@
+volume posix-0
+ type storage/posix
+ option directory /d/backends/b0/0
+end-volume
+
+volume posix-1
+ type storage/posix
+ option directory /d/backends/b0/1
+end-volume
+
+volume metadisp-0
+ type features/metadisp
+ subvolumes posix-0 posix-1
+end-volume
diff --git a/tests/basic/mount-options.disabled b/tests/basic/mount-options.disabled
index 2373e4461ce..a04c8686276 100644
--- a/tests/basic/mount-options.disabled
+++ b/tests/basic/mount-options.disabled
@@ -127,6 +127,9 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-transport=ib-verbs
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --auto-invalidation=off
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=socket
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volume-name=$V0
diff --git a/tests/basic/mount.t b/tests/basic/mount.t
index 26feeb9be0e..3a3d7cc9d8d 100755
--- a/tests/basic/mount.t
+++ b/tests/basic/mount.t
@@ -3,8 +3,9 @@
. $(dirname $0)/../include.rc
. $(dirname $0)/../nfs.rc
-cleanup;
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+cleanup;
## Start and create a volume
TEST glusterd
@@ -68,6 +69,9 @@ TEST rm -f $N0/newfile;
TEST ! stat $M0/newfile;
TEST ! stat $M1/newfile;
+# No need to check for status here right now
+$(dirname $0)/rpc-coverage.sh $N0 >/dev/null
+
## Before killing daemon to avoid deadlocks
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
diff --git a/tests/basic/multiple-volume-shd-mux.t b/tests/basic/multiple-volume-shd-mux.t
new file mode 100644
index 00000000000..d7cfbaec85f
--- /dev/null
+++ b/tests/basic/multiple-volume-shd-mux.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=16
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+for i in $(seq 1 3); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+#Delete the volumes
+for i in $(seq 1 3); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+ TEST $CLI volume delete ${V0}_afr$i
+ TEST $CLI volume delete ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/namespace.t b/tests/basic/namespace.t
index 25a6b98b440..d1bbe7eea29 100644
--- a/tests/basic/namespace.t
+++ b/tests/basic/namespace.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../volume.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
# These hashes are a result of calling SuperFastHash
# on the corresponding folder names.
NAMESPACE_HASH=28153613
diff --git a/tests/basic/nl-cache.t b/tests/basic/nl-cache.t
index 141fb75d5c8..90c778c8a88 100755
--- a/tests/basic/nl-cache.t
+++ b/tests/basic/nl-cache.t
@@ -64,5 +64,35 @@ TEST rm $M0/dir1/file_link
TEST rmdir $M0/dir1/dir2
TEST rmdir $M0/dir1
+#Check mknod
+TEST ! ls -l $M0/dir
+TEST mkdir $M0/dir
+TEST mknod -m 0666 $M0/dir/block b 4 5
+TEST mknod -m 0666 $M0/dir/char c 1 5
+TEST mknod -m 0666 $M0/dir/fifo p
+TEST rm $M0/dir/block
+TEST rm $M0/dir/char
+TEST rm $M0/dir/fifo
+
+#Check getxattr
+TEST touch $M0/file1
+TEST getfattr -d -m. -e hex $M0/file1
+TEST getfattr -n "glusterfs.get_real_filename:file1" $M0;
+TEST getfattr -n "glusterfs.get_real_filename:FILE1" $M0;
+TEST ! getfattr -n "glusterfs.get_real_filename:FILE2" $M0;
+
+#Check statedump
+TEST generate_mount_statedump $V0 $M0
+TEST cleanup_mount_statedump $V0
+
+#Check reconfigure
+TEST $CLI volume reset $V0 nl-cache-timeout
+TEST $CLI volume reset $V0 nl-cache-positive-entry
+TEST $CLI volume reset $V0 nl-cache-limit
+TEST $CLI volume reset $V0 nl-cache-pass-through
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
cleanup;
#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/nufa.t b/tests/basic/nufa.t
index 6221fb16a8f..cb09fc5bbbf 100644
--- a/tests/basic/nufa.t
+++ b/tests/basic/nufa.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../volume.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t
new file mode 100644
index 00000000000..5e865d602e2
--- /dev/null
+++ b/tests/basic/open-behind/open-behind.t
@@ -0,0 +1,183 @@
+#!/bin/bash
+
+WD="$(dirname "${0}")"
+
+. ${WD}/../../include.rc
+. ${WD}/../../volume.rc
+
+function assign() {
+ local _assign_var="${1}"
+ local _assign_value="${2}"
+
+ printf -v "${_assign_var}" "%s" "${_assign_value}"
+}
+
+function pipe_create() {
+ local _pipe_create_var="${1}"
+ local _pipe_create_name
+ local _pipe_create_fd
+
+ _pipe_create_name="$(mktemp -u)"
+ mkfifo "${_pipe_create_name}"
+ exec {_pipe_create_fd}<>"${_pipe_create_name}"
+ rm "${_pipe_create_name}"
+
+ assign "${_pipe_create_var}" "${_pipe_create_fd}"
+}
+
+function pipe_close() {
+ local _pipe_close_fd="${!1}"
+
+ exec {_pipe_close_fd}>&-
+}
+
+function tester_start() {
+ declare -ag tester
+ local tester_in
+ local tester_out
+
+ pipe_create tester_in
+ pipe_create tester_out
+
+ ${WD}/tester <&${tester_in} >&${tester_out} &
+
+ tester=("$!" "${tester_in}" "${tester_out}")
+}
+
+function tester_send() {
+ declare -ag tester
+ local tester_res
+ local tester_extra
+
+ echo "${*}" >&${tester[1]}
+
+ read -t 3 -u ${tester[2]} tester_res tester_extra
+ echo "${tester_res} ${tester_extra}"
+ if [[ "${tester_res}" == "OK" ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
+function tester_stop() {
+ declare -ag tester
+ local tester_res
+
+ tester_send "quit"
+
+ tester_res=0
+ if ! wait ${tester[0]}; then
+ tester_res=$?
+ fi
+
+ unset tester
+
+ return ${tester_res}
+}
+
+function count_open() {
+ local file="$(realpath "${B0}/${V0}/${1}")"
+ local count="0"
+ local inode
+ local ref
+
+ inode="$(stat -c %i "${file}")"
+
+ for fd in /proc/${BRICK_PID}/fd/*; do
+ ref="$(readlink "${fd}")"
+ if [[ "${ref}" == "${B0}/${V0}/"* ]]; then
+ if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then
+ count="$((${count} + 1))"
+ fi
+ fi
+ done
+
+ echo "${count}"
+}
+
+cleanup
+
+TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0}
+TEST ${CLI} volume set ${V0} flush-behind off
+TEST ${CLI} volume set ${V0} write-behind off
+TEST ${CLI} volume set ${V0} quick-read off
+TEST ${CLI} volume set ${V0} stat-prefetch on
+TEST ${CLI} volume set ${V0} io-cache off
+TEST ${CLI} volume set ${V0} open-behind on
+TEST ${CLI} volume set ${V0} lazy-open off
+TEST ${CLI} volume set ${V0} read-after-open off
+TEST ${CLI} volume start ${V0}
+
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})"
+
+TEST touch "${M0}/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_start
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT_WITHIN 5 "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${CLI} volume set ${V0} lazy-open on
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+sleep 2
+EXPECT "0" count_open "/test"
+TEST tester_send fd write 0 "test"
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+EXPECT "test" tester_send fd read 0 64
+# Even though read-after-open is disabled, use-anonymous-fd is also disabled,
+# so reads need to open the file first.
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+TEST tester_send fd open 1 "${M0}/test"
+EXPECT "2" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "1" count_open "/test"
+TEST tester_send fd close 1
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${CLI} volume set ${V0} read-after-open on
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+EXPECT "test" tester_send fd read 0 64
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST tester_stop
+
+cleanup
diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c
new file mode 100644
index 00000000000..00f02bc5b0a
--- /dev/null
+++ b/tests/basic/open-behind/tester-fd.c
@@ -0,0 +1,99 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "tester.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+static int32_t
+fd_open(context_t *ctx, command_t *cmd)
+{
+ obj_t *obj;
+ int32_t fd;
+
+ obj = cmd->args[0].obj.ref;
+
+ fd = open(cmd->args[1].str.data, O_RDWR);
+ if (fd < 0) {
+ return error(errno, "open() failed");
+ }
+
+ obj->type = OBJ_TYPE_FD;
+ obj->fd = fd;
+
+ out_ok("%d", fd);
+
+ return 0;
+}
+
+static int32_t
+fd_close(context_t *ctx, command_t *cmd)
+{
+ obj_t *obj;
+
+ obj = cmd->args[0].obj.ref;
+ obj->type = OBJ_TYPE_NONE;
+
+ if (close(obj->fd) != 0) {
+ return error(errno, "close() failed");
+ }
+
+ out_ok();
+
+ return 0;
+}
+
+static int32_t
+fd_write(context_t *ctx, command_t *cmd)
+{
+ ssize_t len, ret;
+
+ len = strlen(cmd->args[1].str.data);
+ ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len);
+ if (ret < 0) {
+ return error(errno, "write() failed");
+ }
+
+ out_ok("%zd", ret);
+
+ return 0;
+}
+
+static int32_t
+fd_read(context_t *ctx, command_t *cmd)
+{
+ char data[cmd->args[1].num.value + 1];
+ ssize_t ret;
+
+ ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value);
+ if (ret < 0) {
+ return error(errno, "read() failed");
+ }
+
+ data[ret] = 0;
+
+ out_ok("%zd %s", ret, data);
+
+ return 0;
+}
+
+command_t fd_commands[] = {
+ {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))},
+ {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))},
+ {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))},
+ {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))},
+ CMD_END};
diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c
new file mode 100644
index 00000000000..b2da71c8385
--- /dev/null
+++ b/tests/basic/open-behind/tester.c
@@ -0,0 +1,444 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "tester.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+static void *
+mem_alloc(size_t size)
+{
+ void *ptr;
+
+ ptr = malloc(size);
+ if (ptr == NULL) {
+ error(ENOMEM, "Failed to allocate memory (%zu bytes)", size);
+ }
+
+ return ptr;
+}
+
+static void
+mem_free(void *ptr)
+{
+ free(ptr);
+}
+
+static bool
+buffer_create(context_t *ctx, size_t size)
+{
+ ctx->buffer.base = mem_alloc(size);
+ if (ctx->buffer.base == NULL) {
+ return false;
+ }
+
+ ctx->buffer.size = size;
+ ctx->buffer.len = 0;
+ ctx->buffer.pos = 0;
+
+ return true;
+}
+
+static void
+buffer_destroy(context_t *ctx)
+{
+ mem_free(ctx->buffer.base);
+ ctx->buffer.size = 0;
+ ctx->buffer.len = 0;
+}
+
+static int32_t
+buffer_get(context_t *ctx)
+{
+ ssize_t len;
+
+ if (ctx->buffer.pos >= ctx->buffer.len) {
+ len = read(0, ctx->buffer.base, ctx->buffer.size);
+ if (len < 0) {
+ return error(errno, "read() failed");
+ }
+ if (len == 0) {
+ return 0;
+ }
+
+ ctx->buffer.len = len;
+ ctx->buffer.pos = 0;
+ }
+
+ return ctx->buffer.base[ctx->buffer.pos++];
+}
+
+static int32_t
+str_skip_spaces(context_t *ctx, int32_t current)
+{
+ while ((current > 0) && (current != '\n') && isspace(current)) {
+ current = buffer_get(ctx);
+ }
+
+ return current;
+}
+
+static int32_t
+str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current)
+{
+ uint32_t len;
+
+ current = str_skip_spaces(ctx, current);
+
+ len = 0;
+ while ((size > 0) && (current > 0) && (current != '\n') &&
+ !isspace(current)) {
+ len++;
+ *buffer++ = current;
+ size--;
+ current = buffer_get(ctx);
+ }
+
+ if (len == 0) {
+ return error(ENODATA, "Expecting a token");
+ }
+
+ if (size == 0) {
+ return error(ENOBUFS, "Token too long");
+ }
+
+ *buffer = 0;
+
+ return current;
+}
+
+static int32_t
+str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value,
+ int32_t current)
+{
+ char text[32], *ptr;
+ uint64_t num;
+
+ current = str_token(ctx, text, sizeof(text), current);
+ if (current > 0) {
+ num = strtoul(text, &ptr, 0);
+ if ((*ptr != 0) || (num < min) || (num > max)) {
+ return error(ERANGE, "Invalid number");
+ }
+ *value = num;
+ }
+
+ return current;
+}
+
+static int32_t
+str_eol(context_t *ctx, int32_t current)
+{
+ current = str_skip_spaces(ctx, current);
+ if (current != '\n') {
+ return error(EINVAL, "Expecting end of command");
+ }
+
+ return current;
+}
+
+static void
+str_skip(context_t *ctx, int32_t current)
+{
+ while ((current > 0) && (current != '\n')) {
+ current = buffer_get(ctx);
+ }
+}
+
+static int32_t
+cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current)
+{
+ obj_t *obj;
+ uint64_t id;
+
+ current = str_number(ctx, 0, ctx->obj_count, &id, current);
+ if (current <= 0) {
+ return current;
+ }
+
+ obj = &ctx->objs[id];
+ if (obj->type != arg->obj.type) {
+ if (obj->type != OBJ_TYPE_NONE) {
+ return error(EBUSY, "Object is in use");
+ }
+ return error(ENOENT, "Object is not defined");
+ }
+
+ arg->obj.ref = obj;
+
+ return current;
+}
+
+static int32_t
+cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current)
+{
+ return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value,
+ current);
+}
+
+static int32_t
+cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current)
+{
+ return str_token(ctx, arg->str.data, arg->str.size, current);
+}
+
+static int32_t
+cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current)
+{
+ arg_t *arg;
+
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_OBJ:
+ current = cmd_parse_obj(ctx, arg, current);
+ break;
+ case ARG_TYPE_NUM:
+ current = cmd_parse_num(ctx, arg, current);
+ break;
+ case ARG_TYPE_STR:
+ current = cmd_parse_str(ctx, arg, current);
+ break;
+ default:
+ return error(EINVAL, "Unknown argument type");
+ }
+ }
+
+ if (current < 0) {
+ return current;
+ }
+
+ current = str_eol(ctx, current);
+ if (current <= 0) {
+ return error(EINVAL, "Syntax error");
+ }
+
+ return cmd->handler(ctx, cmd);
+}
+
+static int32_t
+cmd_parse(context_t *ctx, command_t *cmds)
+{
+ char text[32];
+ command_t *cmd;
+ int32_t current;
+
+ cmd = cmds;
+ do {
+ current = str_token(ctx, text, sizeof(text), buffer_get(ctx));
+ if (current <= 0) {
+ return current;
+ }
+
+ while (cmd->name != NULL) {
+ if (strcmp(cmd->name, text) == 0) {
+ if (cmd->handler != NULL) {
+ return cmd_parse_args(ctx, cmd, current);
+ }
+ cmd = cmd->cmds;
+ break;
+ }
+ cmd++;
+ }
+ } while (cmd->name != NULL);
+
+ str_skip(ctx, current);
+
+ return error(ENOTSUP, "Unknown command");
+}
+
+static void
+cmd_fini(context_t *ctx, command_t *cmds)
+{
+ command_t *cmd;
+ arg_t *arg;
+
+ for (cmd = cmds; cmd->name != NULL; cmd++) {
+ if (cmd->handler == NULL) {
+ cmd_fini(ctx, cmd->cmds);
+ } else {
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_STR:
+ mem_free(arg->str.data);
+ arg->str.data = NULL;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+}
+
+static bool
+cmd_init(context_t *ctx, command_t *cmds)
+{
+ command_t *cmd;
+ arg_t *arg;
+
+ for (cmd = cmds; cmd->name != NULL; cmd++) {
+ if (cmd->handler == NULL) {
+ if (!cmd_init(ctx, cmd->cmds)) {
+ return false;
+ }
+ } else {
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_STR:
+ arg->str.data = mem_alloc(arg->str.size);
+ if (arg->str.data == NULL) {
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool
+objs_create(context_t *ctx, uint32_t count)
+{
+ uint32_t i;
+
+ ctx->objs = mem_alloc(sizeof(obj_t) * count);
+ if (ctx->objs == NULL) {
+ return false;
+ }
+ ctx->obj_count = count;
+
+ for (i = 0; i < count; i++) {
+ ctx->objs[i].type = OBJ_TYPE_NONE;
+ }
+
+ return true;
+}
+
+static int32_t
+objs_destroy(context_t *ctx)
+{
+ uint32_t i;
+ int32_t err;
+
+ err = 0;
+ for (i = 0; i < ctx->obj_count; i++) {
+ if (ctx->objs[i].type != OBJ_TYPE_NONE) {
+ err = error(ENOTEMPTY, "Objects not destroyed");
+ break;
+ }
+ }
+
+ mem_free(ctx->objs);
+ ctx->objs = NULL;
+ ctx->obj_count = 0;
+
+ return err;
+}
+
+static context_t *
+init(size_t size, uint32_t objs, command_t *cmds)
+{
+ context_t *ctx;
+
+ ctx = mem_alloc(sizeof(context_t));
+ if (ctx == NULL) {
+ goto failed;
+ }
+
+ if (!buffer_create(ctx, size)) {
+ goto failed_ctx;
+ }
+
+ if (!objs_create(ctx, objs)) {
+ goto failed_buffer;
+ }
+
+ if (!cmd_init(ctx, cmds)) {
+ goto failed_objs;
+ }
+
+ ctx->active = true;
+
+ return ctx;
+
+failed_objs:
+ cmd_fini(ctx, cmds);
+ objs_destroy(ctx);
+failed_buffer:
+ buffer_destroy(ctx);
+failed_ctx:
+ mem_free(ctx);
+failed:
+ return NULL;
+}
+
+static int32_t
+fini(context_t *ctx, command_t *cmds)
+{
+ int32_t ret;
+
+ cmd_fini(ctx, cmds);
+ buffer_destroy(ctx);
+
+ ret = objs_destroy(ctx);
+
+ ctx->active = false;
+
+ return ret;
+}
+
+static int32_t
+exec_quit(context_t *ctx, command_t *cmd)
+{
+ ctx->active = false;
+
+ return 0;
+}
+
+static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)},
+ {"quit", exec_quit, CMD_ARGS()},
+ CMD_END};
+
+int32_t
+main(int32_t argc, char *argv[])
+{
+ context_t *ctx;
+ int32_t res;
+
+ ctx = init(1024, 16, commands);
+ if (ctx == NULL) {
+ return 1;
+ }
+
+ do {
+ res = cmd_parse(ctx, commands);
+ if (res < 0) {
+ out_err(-res);
+ }
+ } while (ctx->active);
+
+ res = fini(ctx, commands);
+ if (res >= 0) {
+ out_ok();
+ return 0;
+ }
+
+ out_err(-res);
+
+ return 1;
+}
diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h
new file mode 100644
index 00000000000..64e940c78fc
--- /dev/null
+++ b/tests/basic/open-behind/tester.h
@@ -0,0 +1,145 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __TESTER_H__
+#define __TESTER_H__
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+
+enum _obj_type;
+typedef enum _obj_type obj_type_t;
+
+enum _arg_type;
+typedef enum _arg_type arg_type_t;
+
+struct _buffer;
+typedef struct _buffer buffer_t;
+
+struct _obj;
+typedef struct _obj obj_t;
+
+struct _context;
+typedef struct _context context_t;
+
+struct _arg;
+typedef struct _arg arg_t;
+
+struct _command;
+typedef struct _command command_t;
+
+enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD };
+
+enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR };
+
+struct _buffer {
+ char *base;
+ uint32_t size;
+ uint32_t len;
+ uint32_t pos;
+};
+
+struct _obj {
+ obj_type_t type;
+ union {
+ int32_t fd;
+ };
+};
+
+struct _context {
+ obj_t *objs;
+ buffer_t buffer;
+ uint32_t obj_count;
+ bool active;
+};
+
+struct _arg {
+ arg_type_t type;
+ union {
+ struct {
+ obj_type_t type;
+ obj_t *ref;
+ } obj;
+ struct {
+ uint64_t value;
+ uint64_t min;
+ uint64_t max;
+ } num;
+ struct {
+ uint32_t size;
+ char *data;
+ } str;
+ };
+};
+
+struct _command {
+ const char *name;
+ int32_t (*handler)(context_t *ctx, command_t *cmd);
+ union {
+ arg_t *args;
+ command_t *cmds;
+ };
+};
+
+#define msg(_stream, _fmt, _args...) \
+ do { \
+ fprintf(_stream, _fmt "\n", ##_args); \
+ fflush(_stream); \
+ } while (0)
+
+#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args)
+#define msg_err(_err, _fmt, _args...) \
+ ({ \
+ int32_t __msg_err = (_err); \
+ msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err, \
+ ##_args); \
+ -__msg_err; \
+ })
+
+#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args)
+#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args)
+#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args)
+
+#define out_ok(_args...) msg_out("OK " _args)
+#define out_err(_err) msg_out("ERR %d", _err)
+
+#define ARG_END \
+ { \
+ ARG_TYPE_NONE \
+ }
+
+#define CMD_ARGS1(_x, _args...) \
+ .args = (arg_t[]) { _args }
+#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END)
+
+#define CMD_SUB(_cmds) .cmds = _cmds
+
+#define CMD_END \
+ { \
+ NULL, NULL, CMD_SUB(NULL) \
+ }
+
+#define ARG_VAL(_type) \
+ { \
+ ARG_TYPE_OBJ, .obj = {.type = _type } \
+ }
+#define ARG_NUM(_min, _max) \
+ { \
+ ARG_TYPE_NUM, .num = {.min = _min, .max = _max } \
+ }
+#define ARG_STR(_size) \
+ { \
+ ARG_TYPE_STR, .str = {.size = _size } \
+ }
+
+extern command_t fd_commands[];
+
+#endif /* __TESTER_H__ */ \ No newline at end of file
diff --git a/tests/basic/playground/template-xlator-sanity.t b/tests/basic/playground/template-xlator-sanity.t
index c3090dae5a8..1c665502bfe 100755
--- a/tests/basic/playground/template-xlator-sanity.t
+++ b/tests/basic/playground/template-xlator-sanity.t
@@ -15,6 +15,7 @@ end-volume
volume template
type playground/template
subvolumes posix
+ option dummy 13
end-volume
EOF
@@ -22,6 +23,21 @@ TEST glusterfs -f $B0/template.vol $M0
TEST $(dirname $0)/../rpc-coverage.sh --no-locks $M0
-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+# Take statedump to get maximum code coverage
+pid=$(ps auxww | grep glusterfs | grep -E "template.vol" | awk '{print $2}' | head -1)
+
+TEST generate_statedump $pid
+
+# For monitor output
+kill -USR2 $pid
+
+# Handle SIGHUP and reconfigure
+sed -i -e '/s/dummy 13/dummy 42/g' $B0/template.vol
+kill -HUP $pid
+
+# for calling 'fini()'
+kill -TERM $pid
+
+force_umount $M0
cleanup;
diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t
index 33439562ec9..0e4a1bb409f 100644
--- a/tests/basic/posix/shared-statfs.t
+++ b/tests/basic/posix/shared-statfs.t
@@ -20,15 +20,18 @@ TEST mkdir -p $B0/${V0}1 $B0/${V0}2
TEST MOUNT_LOOP $LO1 $B0/${V0}1
TEST MOUNT_LOOP $LO2 $B0/${V0}2
+total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}')
+#Account for rounding error
+brick_blocks_two_percent_less=$((total_brick_blocks*98/100))
# Create a subdir in mountpoint and use that for volume.
TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
# Keeping the size less than 200M mainly because XFS will use
# some storage in brick to keep its own metadata.
-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
+TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
TEST force_umount $M0
@@ -41,8 +44,8 @@ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ]
+total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
+TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
TEST force_umount $M0
TEST $CLI volume stop $V0
diff --git a/tests/basic/posix/zero-fill-enospace.c b/tests/basic/posix/zero-fill-enospace.c
index 1371ff59a5f..b1f142c6be9 100644
--- a/tests/basic/posix/zero-fill-enospace.c
+++ b/tests/basic/posix/zero-fill-enospace.c
@@ -1,4 +1,5 @@
#include <stdio.h>
+#include <stdlib.h>
#include <glusterfs/api/glfs.h>
#include <glusterfs/api/glfs-handles.h>
@@ -8,7 +9,7 @@ main(int argc, char *argv[])
glfs_t *fs = NULL;
glfs_fd_t *fd = NULL;
int ret = 1;
- int size = 0;
+ off_t size = 0;
if (argc != 6) {
fprintf(stderr,
@@ -45,12 +46,12 @@ main(int argc, char *argv[])
goto out;
}
- size = atoi(argv[5]);
+ size = strtol(argv[5], NULL, 10);
if (size < 0) {
fprintf(stderr, "Wrong size %s", argv[5]);
goto out;
}
- ret = glfs_zerofill(fd, 0, atoi(argv[5]));
+ ret = glfs_zerofill(fd, 0, size);
if (ret <= 0) {
fprintf(stderr, "glfs_zerofill: returned %d\n", ret);
goto out;
diff --git a/tests/basic/quick-read-with-upcall.t b/tests/basic/quick-read-with-upcall.t
index 318e93a1bf0..dfb751dfcdb 100644
--- a/tests/basic/quick-read-with-upcall.t
+++ b/tests/basic/quick-read-with-upcall.t
@@ -15,8 +15,8 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
TEST $CLI volume start $V0
# Mount FUSE without selinux:
-TEST glusterfs -s $H0 --volfile-id $V0 $M0;
-TEST glusterfs -s $H0 --volfile-id $V0 $M1;
+TEST glusterfs -s $H0 --volfile-id $V0 --direct-io-mode=enable $M0;
+TEST glusterfs -s $H0 --volfile-id $V0 --direct-io-mode=enable $M1;
D0="test-message0";
D1="test-message1";
@@ -38,11 +38,13 @@ EXPECT "$D1" cat $M0/test.txt
EXPECT "$D0" cat $M1/test.txt
sleep 1
+
+# TODO: This line normally fails
EXPECT "$D1" cat $M1/test.txt
TEST $CLI volume set $V0 features.cache-invalidation on
-TEST $CLI volume set $V0 performance.qr-cache-timeout 60
-TEST $CLI volume set $V0 performance.md-cache-timeout 60
+TEST $CLI volume set $V0 performance.quick-read-cache-timeout 15
+TEST $CLI volume set $V0 performance.md-cache-timeout 15
TEST write_to "$M0/test1.txt" "$D0"
EXPECT "$D0" cat $M0/test1.txt
@@ -55,9 +57,10 @@ EXPECT "$D0" cat $M1/test1.txt
sleep 1
EXPECT "$D0" cat $M1/test1.txt
-sleep 60
+sleep 30
EXPECT "$D1" cat $M1/test1.txt
+TEST $CLI volume set $V0 performance.quick-read-cache-invalidation on
TEST $CLI volume set $V0 performance.cache-invalidation on
TEST write_to "$M0/test2.txt" "$D0"
diff --git a/tests/basic/quota-anon-fd-nfs.t b/tests/basic/quota-anon-fd-nfs.t
index 11fbe49c7ff..9e6675af6ec 100755
--- a/tests/basic/quota-anon-fd-nfs.t
+++ b/tests/basic/quota-anon-fd-nfs.t
@@ -5,6 +5,8 @@
. $(dirname $0)/../nfs.rc
. $(dirname $0)/../fileio.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
QDD=$(dirname $0)/quota
diff --git a/tests/basic/quota-nfs.t b/tests/basic/quota-nfs.t
index 663a8da90ad..de94a950a7f 100755
--- a/tests/basic/quota-nfs.t
+++ b/tests/basic/quota-nfs.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../volume.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
QDD=$(dirname $0)/quota
# compile the test write program and run it
diff --git a/tests/basic/quota.t b/tests/basic/quota.t
index 7f8b21de6f8..46d1bafff84 100755
--- a/tests/basic/quota.t
+++ b/tests/basic/quota.t
@@ -6,6 +6,8 @@
. $(dirname $0)/../dht.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
QDD=$(dirname $0)/quota
diff --git a/tests/basic/rpc-coverage.sh b/tests/basic/rpc-coverage.sh
index 4b6759a59eb..6203f0ac7cb 100755
--- a/tests/basic/rpc-coverage.sh
+++ b/tests/basic/rpc-coverage.sh
@@ -419,9 +419,15 @@ function test_rmdir()
rm -rf $PFX || fail "rm -rf"
}
+function test_statvfs()
+{
+ df $DIR 2>&1 || fail "df"
+}
+
function run_tests()
{
+ test_statvfs;
test_mkdir;
test_create;
test_statfs;
@@ -436,15 +442,15 @@ function run_tests()
test_rename;
test_chmod;
test_chown;
- test_utimes;
- if [ "$run_lock_tests" = "1" ]; then
- test_locks;
- fi
test_readdir;
test_setxattr;
test_listxattr;
test_getxattr;
test_removexattr;
+ if [ "$run_lock_tests" = "1" ]; then
+ test_locks;
+ fi
+ test_utimes;
test_unlink;
test_rmdir;
}
diff --git a/tests/basic/rpc-coverage.t b/tests/basic/rpc-coverage.t
index a76ba7084eb..2c1bcd5a63a 100755
--- a/tests/basic/rpc-coverage.t
+++ b/tests/basic/rpc-coverage.t
@@ -9,11 +9,11 @@ TEST glusterd
TEST pidof glusterd
TEST $CLI volume info;
-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
-EXPECT '8' brick_count $V0
+EXPECT '9' brick_count $V0
TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
diff --git a/tests/basic/sdfs-sanity.t b/tests/basic/sdfs-sanity.t
index f25376c3cad..16d0bed866f 100644
--- a/tests/basic/sdfs-sanity.t
+++ b/tests/basic/sdfs-sanity.t
@@ -19,4 +19,10 @@ TEST $GFS -s $H0 --volfile-id $V0 $M1;
# create operations
TEST $(dirname $0)/rpc-coverage.sh $M1
+TEST cp $(dirname ${0})/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
cleanup;
diff --git a/tests/basic/ec/seek.c b/tests/basic/seek.c
index 54fa6f463af..54fa6f463af 100644
--- a/tests/basic/ec/seek.c
+++ b/tests/basic/seek.c
diff --git a/tests/basic/shd-mux-afr.t b/tests/basic/shd-mux-afr.t
new file mode 100644
index 00000000000..cf300c148bb
--- /dev/null
+++ b/tests/basic/shd-mux-afr.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Create a one more volume
+TEST $CLI volume create ${V0}_1 replica 3 $H0:$B0/${V0}_1{0,1,2,3,4,5}
+TEST $CLI volume start ${V0}_1
+
+#Check whether the shd has multiplexed or not
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
+
+TEST $CLI volume set ${V0}_1 cluster.background-self-heal-count 0
+TEST $CLI volume set ${V0}_1 cluster.eager-lock off
+TEST $CLI volume set ${V0}_1 performance.flush-behind off
+TEST $GFS --volfile-id=/${V0}_1 --volfile-server=$H0 $M1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick ${V0}_1 $H0 $B0/${V0}_10
+TEST kill_brick ${V0}_1 $H0 $B0/${V0}_14
+
+TEST touch $M0/foo{1..100}
+TEST touch $M1/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count ${V0}_1
+
+TEST $CLI volume start ${V0} force
+TEST $CLI volume start ${V0}_1 force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_1
+
+TEST rm -rf $M0/*
+TEST rm -rf $M1/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+#Stop the volume
+TEST $CLI volume stop ${V0}_1
+TEST $CLI volume delete ${V0}_1
+
+#Check the stop succeeded and detached the volume with out restarting it
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+
+#Check the thread count become to earlier number after stopping
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+cleanup
diff --git a/tests/basic/shd-mux-ec.t b/tests/basic/shd-mux-ec.t
new file mode 100644
index 00000000000..ef4d65018d3
--- /dev/null
+++ b/tests/basic/shd-mux-ec.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Now create a ec volume and check mux works
+TEST $CLI volume create ${V0}_2 disperse 6 redundancy 2 $H0:$B0/${V0}_2{0,1,2,3,4,5}
+TEST $CLI volume start ${V0}_2
+
+#Check whether the shd has multiplexed or not
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
+
+TEST $CLI volume set ${V0}_2 cluster.background-self-heal-count 0
+TEST $CLI volume set ${V0}_2 cluster.eager-lock off
+TEST $CLI volume set ${V0}_2 performance.flush-behind off
+TEST $GFS --volfile-id=/${V0}_2 --volfile-server=$H0 $M1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick ${V0}_2 $H0 $B0/${V0}_20
+TEST kill_brick ${V0}_2 $H0 $B0/${V0}_22
+
+TEST touch $M0/foo{1..100}
+TEST touch $M1/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^404$" get_pending_heal_count ${V0}_2
+
+TEST $CLI volume start ${V0} force
+TEST $CLI volume start ${V0}_2 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_2
+
+TEST rm -rf $M0/*
+TEST rm -rf $M1/*
+
+
+#Stop the volume
+TEST $CLI volume stop ${V0}_2
+TEST $CLI volume delete ${V0}_2
+
+#Check the stop succeeded and detached the volume with out restarting it
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+
+#Check the thread count become to zero for ec related threads
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to earlier number after stopping
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/stats-dump.t b/tests/basic/stats-dump.t
index 054df2e636f..ed73fd1d14a 100644
--- a/tests/basic/stats-dump.t
+++ b/tests/basic/stats-dump.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../volume.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/basic/trace.t b/tests/basic/trace.t
new file mode 100755
index 00000000000..01e7c9e0a25
--- /dev/null
+++ b/tests/basic/trace.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/single-brick
+cat > $B0/template.vol <<EOF
+volume posix
+ type storage/posix
+ option directory $B0/single-brick
+end-volume
+
+volume trace
+ type debug/trace
+ option log-file yes
+ option log-history yes
+ subvolumes posix
+end-volume
+EOF
+
+TEST glusterfs -f $B0/template.vol $M0
+
+TEST $(dirname $0)/rpc-coverage.sh --no-locks $M0
+
+# Take statedump to get maximum code coverage
+pid=$(ps auxww | grep glusterfs | grep -E "template.vol" | awk '{print $2}' | head -1)
+
+TEST generate_statedump $pid
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Now, use the glusterd way of enabling trace
+TEST glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume set $V0 debug.trace marker
+TEST $CLI volume set $V0 debug.log-file yes
+#TEST $CLI volume set $V0 debug.log-history yes
+
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+TEST $(dirname $0)/rpc-coverage.sh --no-locks $M1
+cp $(dirname ${0})/gfapi/glfsxmp-coverage.c ./glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0 > /dev/null
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+cleanup;
diff --git a/tests/basic/uss.t b/tests/basic/uss.t
index 40e556e372e..09dd00ef995 100644
--- a/tests/basic/uss.t
+++ b/tests/basic/uss.t
@@ -6,6 +6,8 @@
. $(dirname $0)/../fileio.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
function check_readonly()
{
$@ 2>&1 | grep -q 'Read-only file system'
@@ -34,6 +36,7 @@ TEST glusterd;
TEST pidof glusterd;
TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
TEST $CLI volume set $V0 nfs.disable false
@@ -372,6 +375,15 @@ TEST rm -f $M0/aaa;
TEST $CLI snapshot delete snap6;
+# drop the caches so that, the dentry for "snap6" is
+# is forgotten from the client cache.
+drop_cache $M0
+
+EXPECT_WITHIN 30 "5" count_snaps $M0;
+
+# This should fail, as snap6 just got deleted.
+TEST ! stat $M0/.history/snap6
+
TEST $CLI snapshot create snap6 $V0 no-timestamp
TEST ls $M0/.history;
@@ -382,4 +394,28 @@ TEST ls $M0/.history/snap6/;
TEST ! stat $M0/.history/snap6/aaa;
+TEST stat $M0
+
+# done with the tests start cleaning up of things
+TEST $CLI volume set $V0 features.uss disable
+
+TEST $CLI snapshot delete snap6;
+
+TEST $CLI snapshot delete snap5;
+
+TEST $CLI snapshot delete snap4;
+
+TEST $CLI snapshot delete snap3;
+
+TEST $CLI snapshot delete snap2;
+
+TEST $CLI snapshot delete snap1;
+
+# nfs client has been already unmounted at line 333
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+
+TEST $CLI volume delete $V0
+
cleanup;
diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t
new file mode 100644
index 00000000000..102de22468e
--- /dev/null
+++ b/tests/basic/volume-scale-shd-mux.t
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=6
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 2); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{6,7,8};
+#Check the thread count become to number of volumes*number of afr subvolume plus 3 additional threads from newly added bricks (3*6+3=21)
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Remove the brick and check the detach is successful
+$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"
+TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5};
+#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18)
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+#Remove the brick and check the detach is successful
+$CLI volume remove-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5} force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+
+for i in $(seq 1 2); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+
+TEST touch $M0/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+
+TEST $CLI volume start ${V0} force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -rf $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+shd_pid=$(get_shd_mux_pid $V0)
+TEST $CLI volume create ${V0}_distribute1 $H0:$B0/${V0}_distribute10
+TEST $CLI volume start ${V0}_distribute1
+
+#Creating a non-replicate/non-ec volume should not have any effect in shd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+EXPECT "^${shd_pid}$" get_shd_mux_pid $V0
+
+TEST mkdir $B0/add/
+#Now convert the distributed volume to replicate
+TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#scale down the volume
+TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Before stopping the process, make sure there is no pending clenup threads hanging
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+TEST rm -rf $B0/add/2 $B0/add/3
+
+#Now convert the distributed volume back to replicate and make sure that a new shd is spawned
+TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3};
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $HEAL_TIMEOUT "^3$" number_healer_threads_shd ${V0}_distribute1 "afr_shd_index_healer"
+
+#Now convert the replica volume to distribute again and make sure the shd is now stopped
+TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
+TEST rm -rf $B0/add/
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1708929
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1708929
diff --git a/tests/basic/volume-snap-scheduler.t b/tests/basic/volume-snap-scheduler.t
new file mode 100644
index 00000000000..a638c5cc46a
--- /dev/null
+++ b/tests/basic/volume-snap-scheduler.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $V0
+
+## Create, start and mount meta_volume as
+## snap_scheduler expects shared storage to be enabled.
+## This test is very basic in nature not creating any snapshot
+## and purpose is to validate snap scheduling commands.
+
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##function to check status
+function check_status_scheduler()
+{
+ local key=$1
+ snap_scheduler.py status | grep -F "$key" | wc -l
+}
+
+##Basic snap_scheduler command test init/enable/disable/list
+
+TEST snap_scheduler.py init
+
+TEST snap_scheduler.py enable
+
+EXPECT 1 check_status_scheduler "Enabled"
+
+TEST snap_scheduler.py disable
+
+EXPECT 1 check_status_scheduler "Disabled"
+
+TEST snap_scheduler.py list
+
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/volume-snapshot-xml.t b/tests/basic/volume-snapshot-xml.t
index 3ba25f4ddbb..ff63b54538d 100755
--- a/tests/basic/volume-snapshot-xml.t
+++ b/tests/basic/volume-snapshot-xml.t
@@ -1,13 +1,9 @@
#!/bin/bash
. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
. $(dirname $0)/../snapshot.rc
-function get-xml()
-{
- $CLI $1 --xml | xmllint --format - | grep $2 | sed 's/\(<"$2">\|<\/"$2">\)//g'
-}
-
cleanup;
TEST verify_lvm_version;
TEST glusterd;
diff --git a/tests/basic/volume-status.t b/tests/basic/volume-status.t
index d0d0b90250f..01d7ebf6c07 100644
--- a/tests/basic/volume-status.t
+++ b/tests/basic/volume-status.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../volume.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
function gluster_client_list_status () {
@@ -32,6 +34,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
## Mount FUSE
TEST $GFS -s $H0 --volfile-id $V0 $M0;
+TEST touch $M0/file{1..20}
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" gluster_fd_status
@@ -55,6 +58,8 @@ function test_nfs_cmds () {
for cmd in ${nfs_cmds[@]}; do
$CLI volume status $V0 nfs $cmd
(( ret += $? ))
+ $CLI volume status $V0 nfs $cmd --xml
+ (( ret += $? ))
done
return $ret
}
@@ -65,6 +70,8 @@ function test_shd_cmds () {
for cmd in ${shd_cmds[@]}; do
$CLI volume status $V0 shd $cmd
(( ret += $? ))
+ $CLI volume status $V0 shd $cmd --xml
+ (( ret += $? ))
done
return $ret
}
@@ -76,14 +83,29 @@ function test_brick_cmds () {
for i in {1..2}; do
$CLI volume status $V0 $H0:$B0/${V0}$i $cmd
(( ret += $? ))
+ $CLI volume status $V0 $H0:$B0/${V0}$i $cmd --xml
+ (( ret += $? ))
done
done
return $ret
}
+function test_status_cmds () {
+ local ret=0
+ declare -a cmds=("detail" "clients" "mem" "inode" "fd" "callpool" "tasks" "client-list")
+ for cmd in ${cmds[@]}; do
+ $CLI volume status $V0 $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
TEST test_shd_cmds;
TEST test_nfs_cmds;
TEST test_brick_cmds;
+TEST test_status_cmds;
## Before killing daemon to avoid deadlocks
diff --git a/tests/basic/volume.t b/tests/basic/volume.t
index 35ad55c3c5c..27fe093d07d 100755..100644
--- a/tests/basic/volume.t
+++ b/tests/basic/volume.t
@@ -11,7 +11,6 @@ TEST $CLI volume info;
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
-
EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
EXPECT '6' brick_count $V0
@@ -25,10 +24,37 @@ EXPECT '9' brick_count $V0
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}{1,2,3} force;
EXPECT '6' brick_count $V0
-TEST $CLI volume stop $V0;
-EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume top $V0 read-perf bs 4096 count 1000
+TEST $CLI volume top $V0 write-perf bs 1048576 count 2
+
+TEST touch $M0/foo
+
+# statedump path should be a directory, setting it to a file path should fail
+
+TEST ! $CLI v set $V0 server.statedump-path $M0/foo;
+EXPECT '/var/run/gluster' $CLI v get $V0 server.statedump-path
+
+#set the statedump path to an existing ditectory which should succeed
+TEST mkdir $D0/level;
+TEST $CLI v set $V0 server.statedump-path $D0/level
+EXPECT '/level' volinfo_field $V0 'server.statedump-path'
+
+ret=$(ls $D0/level | wc -l);
+TEST [ $ret == 0 ]
+TEST $CLI v statedump $V0;
+ret=$(ls $D0/level | wc -l);
+TEST ! [ $ret == 0 ]
+
+#set the statedump path to a non - existing directory which should fail
+TEST ! $CLI v set $V0 server.statedump-path /root/test
+EXPECT '/level' volinfo_field $V0 'server.statedump-path'
+
+TEST rm -rf $D0/level
+
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status'
-TEST $CLI volume delete $V0;
-TEST ! $CLI volume info $V0;
+TEST $CLI volume delete $V0
+TEST ! $CLI volume info $V0
cleanup;
diff --git a/tests/bitrot/br-signer-threads-config-1797869.t b/tests/bitrot/br-signer-threads-config-1797869.t
new file mode 100644
index 00000000000..657ef3eedaf
--- /dev/null
+++ b/tests/bitrot/br-signer-threads-config-1797869.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+
+function get_bitd_count_1 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | wc -l
+}
+
+function get_bitd_count_2 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | wc -l
+}
+
+function get_bitd_pid_1 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | awk '{print $2}'
+}
+
+function get_bitd_pid_2 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | awk '{print $2}'
+}
+
+function get_signer_th_count_1 {
+ ps -eL | grep $(get_bitd_pid_1) | grep glfs_brpobj | wc -l
+}
+
+function get_signer_th_count_2 {
+ ps -eL | grep $(get_bitd_pid_2) | grep glfs_brpobj | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 $H1:$B1
+TEST $CLI_1 volume create $V1 $H2:$B2
+EXPECT 'Created' volinfo_field_1 $V0 'Status';
+EXPECT 'Created' volinfo_field_1 $V1 'Status';
+
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume start $V1
+EXPECT 'Started' volinfo_field_1 $V0 'Status';
+EXPECT 'Started' volinfo_field_1 $V1 'Status';
+
+#Enable bitrot
+TEST $CLI_1 volume bitrot $V0 enable
+TEST $CLI_1 volume bitrot $V1 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2
+
+old_bitd_pid_1=$(get_bitd_pid_1)
+old_bitd_pid_2=$(get_bitd_pid_2)
+TEST $CLI_1 volume bitrot $V0 signer-threads 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1
+EXPECT_NOT "$old_bitd_pid_1" get_bitd_pid_1;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2
+EXPECT "$old_bitd_pid_2" get_bitd_pid_2;
+
+old_bitd_pid_1=$(get_bitd_pid_1)
+old_bitd_pid_2=$(get_bitd_pid_2)
+TEST $CLI_1 volume bitrot $V1 signer-threads 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_signer_th_count_2
+EXPECT_NOT "$old_bitd_pid_2" get_bitd_pid_2;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1
+EXPECT "$old_bitd_pid_1" get_bitd_pid_1;
+
+cleanup;
diff --git a/tests/bitrot/br-stub.t b/tests/bitrot/br-stub.t
index 5013398482a..cc0319afac9 100644
--- a/tests/bitrot/br-stub.t
+++ b/tests/bitrot/br-stub.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../volume.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
STUB_SOURCE=$(dirname $0)/br-stub.c
STUB_EXEC=$(dirname $0)/br-stub
diff --git a/tests/bitrot/bug-1244613.t b/tests/bitrot/bug-1244613.t
index 5674e6f4ef5..57b86a94ac0 100644
--- a/tests/bitrot/bug-1244613.t
+++ b/tests/bitrot/bug-1244613.t
@@ -5,6 +5,8 @@
. $(dirname $0)/../nfs.rc
. $(dirname $0)/../fileio.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TESTS_EXPECTED_IN_LOOP=16
diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
index 5c53ebb82e2..6af5124e86e 100644
--- a/tests/bitrot/bug-1373520.t
+++ b/tests/bitrot/bug-1373520.t
@@ -59,6 +59,9 @@ TEST rm -rf $(find $B0/${V0}5 -inum $(stat -c %i $B0/${V0}5/FILE1))
#New mount for recovery
TEST $GFS -s $H0 --use-readdirp=no --attribute-timeout=0 --entry-timeout=0 --volfile-id $V0 $M1
+$CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume heal $V0
+
#Access files
TEST cat $M1/FILE1
EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" path_size $B0/${V0}5/FILE1
diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
new file mode 100644
index 00000000000..f27374211fe
--- /dev/null
+++ b/tests/bitrot/bug-1700078.t
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot for volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+## Turn off quick-read so that it wont cache the contents
+# of the file in lookup. For corrupted files, it might
+# end up in reads being served from the cache instead of
+# an error.
+TEST $CLI volume set $V0 performance.quick-read off
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
+
+## Set expiry-timeout to 1 sec
+TEST $CLI volume set $V0 features.expiry-time 1
+
+##Mount $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+## Turn off quick-read xlator so that, the contents are not served from the
+# quick-read cache.
+TEST $CLI volume set $V0 performance.quick-read off
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
+
+##disable bitrot
+TEST $CLI volume bitrot $V0 disable
+
+## modify the file
+TEST `echo "write" >> $M0/FILE1`
+
+# unmount and remount when the file has to be accessed.
+# This is to ensure that, when the remount happens,
+# and the file is read, its contents are served from the
+# brick instead of cache.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+##enable bitrot
+TEST $CLI volume bitrot $V0 enable
+
+# expiry time is set to 1 second. Hence sleep for 2 seconds for the
+# oneshot crawler to finish its crawling and sign the file properly.
+sleep 2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
+
+## Ondemand scrub
+TEST $CLI volume bitrot $V0 scrub ondemand
+
+# the scrub ondemand CLI command, just ensures that
+# the scrubber has received the ondemand scrub directive
+# and started. sleep for 2 seconds for scrubber to finish
+# crawling and marking file(s) as bad (if if finds that
+# corruption has happened) which are filesystem operations.
+sleep 2
+
+TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
+
+##Mount $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST cat $M0/FILE1
+
+cleanup;
diff --git a/tests/bugs/access-control/bug-958691.t b/tests/bugs/access-control/bug-958691.t
index 6c45b47b166..8b70607bdbb 100644
--- a/tests/bugs/access-control/bug-958691.t
+++ b/tests/bugs/access-control/bug-958691.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/bitrot/bug-1227996.t b/tests/bugs/bitrot/bug-1227996.t
index 47ebc4235cf..121c7b5f279 100644
--- a/tests/bugs/bitrot/bug-1227996.t
+++ b/tests/bugs/bitrot/bug-1227996.t
@@ -17,7 +17,6 @@ TEST pidof glusterd;
## Lets create and start the volume
TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
TEST $CLI volume start $V0
-
## Enable bitrot on volume $V0
TEST $CLI volume bitrot $V0 enable
diff --git a/tests/bugs/bitrot/bug-1245981.t b/tests/bugs/bitrot/bug-1245981.t
index 2bed4d980fa..f3955256b01 100644
--- a/tests/bugs/bitrot/bug-1245981.t
+++ b/tests/bugs/bitrot/bug-1245981.t
@@ -47,9 +47,9 @@ touch $M0/5
sleep `expr $SLEEP_TIME \* 2`
backpath=$(get_backend_paths $fname)
-TEST getfattr -m . -n trusted.bit-rot.signature $backpath
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
backpath=$(get_backend_paths $M0/new_file)
-TEST getfattr -m . -n trusted.bit-rot.signature $backpath
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
cleanup;
diff --git a/tests/bugs/bug-1064147.t b/tests/bugs/bug-1064147.t
new file mode 100755
index 00000000000..27ffde4eb44
--- /dev/null
+++ b/tests/bugs/bug-1064147.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+#------------------------------------------------------------
+
+# Test case 1 - Subvolume down + Healing
+#------------------------------------------------------------
+# Kill 2nd brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+
+# Change root permissions
+TEST chmod 444 $M0
+
+# Store permission for comparision
+TEST permission_new=`stat -c "%A" $M0`
+
+# Bring up the killed brick process
+TEST $CLI volume start $V0 force
+
+# Perform lookup
+sleep 5
+TEST ls $M0
+
+# Check brick permissions
+TEST brick_perm=`stat -c "%A" $B0/${V0}2`
+TEST [ ${brick_perm} = ${permission_new} ]
+#------------------------------------------------------------
+
+# Test case 2 - Add-brick + Healing
+#------------------------------------------------------------
+# Change root permissions
+TEST chmod 777 $M0
+
+# Store permission for comparision
+TEST permission_new_2=`stat -c "%A" $M0`
+
+# Add a 3rd brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+
+# Perform lookup
+sleep 5
+TEST ls $M0
+
+# Check permissions on the new brick
+TEST brick_perm2=`stat -c "%A" $B0/${V0}3`
+
+TEST [ ${brick_perm2} = ${permission_new_2} ]
+
+cleanup;
diff --git a/tests/bugs/bug-1138841.t b/tests/bugs/bug-1138841.t
new file mode 100644
index 00000000000..abec5e89d56
--- /dev/null
+++ b/tests/bugs/bug-1138841.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a volume and set auth.allow using cidr format ip
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 auth.allow 127.0.0.1/20
+TEST $CLI volume start $V0
+
+
+## mount the volume and create a file on the mount point
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST touch $M0/tmp1
+
+## Stop the volume and do the cleanup
+
+TEST $CLI volume stop $V0
+cleanup
diff --git a/tests/bugs/bug-1258069.t b/tests/bugs/bug-1258069.t
index 8df4a8a9e1b..b87ecbf2fe8 100755
--- a/tests/bugs/bug-1258069.t
+++ b/tests/bugs/bug-1258069.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../volume.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup
TEST glusterd
diff --git a/tests/bugs/bug-1371806.t b/tests/bugs/bug-1371806.t
index 7dc1613a4f2..08180525650 100644
--- a/tests/bugs/bug-1371806.t
+++ b/tests/bugs/bug-1371806.t
@@ -28,6 +28,7 @@ TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3,4,5}
TEST $CLI volume start $V0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
diff --git a/tests/bugs/bug-1371806_acl.t b/tests/bugs/bug-1371806_acl.t
index aa41e04b96f..c39165628cc 100644
--- a/tests/bugs/bug-1371806_acl.t
+++ b/tests/bugs/bug-1371806_acl.t
@@ -39,6 +39,7 @@ TEST pidof glusterd
TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3,4,5}
TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
TEST $CLI volume start $V0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
TEST glusterfs --volfile-id=$V0 --acl --volfile-server=$H0 --entry-timeout=0 $M0;
@@ -77,6 +78,10 @@ cd $M0
## was up at the time of updated xattrs
TEST stat ./tmp{1..10}
+# Make sure to send a write and read on the file inside mount
+echo "helloworld" > ./tmp1/file
+TEST cat ./tmp1/file
+
## Compare succ value with updated acl attributes
count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "user:tmpuser:rw-"`
EXPECT "$succ" echo $count
@@ -87,4 +92,5 @@ EXPECT "$succ1" echo $count
cd -
userdel --force tmpuser
+
cleanup
diff --git a/tests/bugs/bug-1620580.t b/tests/bugs/bug-1620580.t
new file mode 100644
index 00000000000..0c74d4a6089
--- /dev/null
+++ b/tests/bugs/bug-1620580.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+#do some operation on mount, so that kill_brick is guaranteed to be
+#done _after_ first lookup on root
+
+TEST ls $M0
+TEST touch $M0/file
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+# Case of Same volume name, but different bricks
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{3,4};
+TEST $CLI volume start $V0;
+
+# Give time for 'reconnect' to happen
+sleep 4
+
+TEST ! ls $M0
+TEST ! touch $M0/file1
+
+# Case of Same brick, but different volume (ie, recreated).
+TEST $CLI volume create $V1 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V1;
+
+# Give time for 'reconnect' to happen
+sleep 4
+TEST ! ls $M0
+TEST ! touch $M0/file2
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST $CLI volume stop $V1
+TEST $CLI volume delete $V1
+
+# Case of Same brick, but different volume (but same volume name)
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0;
+
+# Give time for 'reconnect' to happen
+sleep 4
+TEST ! ls $M0
+TEST ! touch $M0/file3
+
+
+cleanup
diff --git a/tests/bugs/bug-1694920.t b/tests/bugs/bug-1694920.t
new file mode 100644
index 00000000000..5bf93c92f94
--- /dev/null
+++ b/tests/bugs/bug-1694920.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should succeed by default
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_write $fd1 "data"
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should fail when client.strict-locks is on
+TEST $CLI volume set $V0 client.strict-locks on
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST ! fd_write $fd1 "data"
+TEST fd_close $fd1
+
+cleanup
diff --git a/tests/bugs/bug-1702299.t b/tests/bugs/bug-1702299.t
new file mode 100644
index 00000000000..1cff2ed5d3d
--- /dev/null
+++ b/tests/bugs/bug-1702299.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function set_fattr {
+ for i in `seq 1 10`
+ do
+ setfattr -n user.foo -v "newabc" ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+succ=fail=0
+## set user.foo xattr with value newabc after kill one brick
+set_fattr
+count=10
+EXPECT "$succ" echo $count
+count=0
+EXPECT "$fail" echo $count
+
+cd -
+
+# Add-brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5}
+
+cd $M0
+## At this point dht code will heal xattr on down brick only for those dirs
+## hashed subvol was up at the time of update xattr
+TEST stat ./tmp{1..10}
+
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}4/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+
+cd -
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t
index f5ffcbe082a..e719fc59033 100755
--- a/tests/bugs/cli/bug-1320388.t
+++ b/tests/bugs/cli/bug-1320388.t
@@ -21,7 +21,7 @@ cleanup;
rm -f $SSL_BASE/glusterfs.*
touch "$GLUSTERD_WORKDIR"/secure-access
-TEST openssl genrsa -out $SSL_KEY 1024
+TEST openssl genrsa -out $SSL_KEY 2048
TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
ln $SSL_CERT $SSL_CA
diff --git a/tests/bugs/cli/bug-1353156-get-state-cli-validations.t b/tests/bugs/cli/bug-1353156-get-state-cli-validations.t
index c9c06cc6567..a4556c9c997 100644
--- a/tests/bugs/cli/bug-1353156-get-state-cli-validations.t
+++ b/tests/bugs/cli/bug-1353156-get-state-cli-validations.t
@@ -43,7 +43,6 @@ push_trapfunc rm -rf $ODIR
TEST $CLI volume create $V0 disperse $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
TEST $CLI volume start $V0
-TEST $CLI volume tier $V0 attach replica 2 $H0:$B1/b4 $H0:$B1/b5
TEST setup_lvm 1
TEST $CLI volume create $V1 $H0:$L1;
diff --git a/tests/bugs/cli/bug-1378842-volume-get-all.t b/tests/bugs/cli/bug-1378842-volume-get-all.t
index c798ce5ceff..be41f25b000 100644
--- a/tests/bugs/cli/bug-1378842-volume-get-all.t
+++ b/tests/bugs/cli/bug-1378842-volume-get-all.t
@@ -12,9 +12,6 @@ TEST $CLI volume set all server-quorum-ratio 80
# Execute volume get without having an explicit option, this should fail
TEST ! $CLI volume get all
-# Also volume get on an option not applicable for all volumes should fail
-TEST ! $CLI volume get all cluster.tier-mode
-
# Execute volume get with an explicit global option
TEST $CLI volume get all server-quorum-ratio
EXPECT '80' volume_get_field all 'cluster.server-quorum-ratio'
diff --git a/tests/bugs/cli/bug-983317-volume-get.t b/tests/bugs/cli/bug-983317-volume-get.t
index 8f09d588565..c793bbc9f0c 100644
--- a/tests/bugs/cli/bug-983317-volume-get.t
+++ b/tests/bugs/cli/bug-983317-volume-get.t
@@ -7,7 +7,8 @@ cleanup;
TEST glusterd
TEST pidof glusterd
-TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+EXPECT 'Created' volinfo_field $V0 'Status';
# Set a volume option
TEST $CLI volume set $V0 open-behind on
@@ -32,3 +33,13 @@ EXPECT '80' volume_get_field $V0 'server-quorum-ratio'
# Check user.* options can also be retrived using volume get
EXPECT 'dummy' volume_get_field $V0 'user.metadata'
+
+TEST $CLI volume set all brick-multiplex enable
+EXPECT 'enable' volume_get_field $V0 'brick-multiplex'
+
+TEST $CLI volume set all brick-multiplex disable
+EXPECT 'disable' volume_get_field $V0 'brick-multiplex'
+
+#setting an cluster level option for single volume should fail
+TEST ! $CLI volume set $V0 brick-multiplex enable
+
diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
index 6351ba22511..a1b9a851bf7 100755
--- a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
+++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../../volume.rc
cleanup;
+FILE_COUNT=500
+
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
@@ -11,15 +13,14 @@ TEST $CLI volume set $V0 cluster.shd-wait-qlength 100
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
-touch $M0/file{1..200}
-
+for i in `seq 1 $FILE_COUNT`; do touch $M0/file$i; done
TEST kill_brick $V0 $H0 $B0/${V0}1
-for i in {1..200}; do echo hello>$M0/file$i; done
+for i in `seq 1 $FILE_COUNT`; do echo hello>$M0/file$i; chmod -x $M0/file$i; done
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
-EXPECT "200" get_pending_heal_count $V0
+EXPECT "$FILE_COUNT" get_pending_heal_count $V0
TEST $CLI volume set $V0 self-heal-daemon on
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
diff --git a/tests/bugs/core/bug-1432542-mpx-restart-crash.t b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
index f21bcff6a71..2793d7008e1 100644
--- a/tests/bugs/core/bug-1432542-mpx-restart-crash.t
+++ b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
@@ -6,7 +6,9 @@ SCRIPT_TIMEOUT=800
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../traps.rc
-NUM_VOLS=20
+cleanup;
+
+NUM_VOLS=15
MOUNT_BASE=$(dirname $M0)
# GlusterD reports that bricks are started when in fact their attach requests
@@ -77,15 +79,27 @@ TEST $CLI volume set all cluster.brick-multiplex on
# Our infrastructure can't handle an arithmetic expression here. The formula
# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
# NUM_VOLS-1 and there are 5 such statements in each iteration.
-TESTS_EXPECTED_IN_LOOP=114
+TESTS_EXPECTED_IN_LOOP=84
for i in $(seq 1 $NUM_VOLS); do
+ starttime="$(date +%s)";
+
create_volume $i
TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
# Unmounting to reduce memory footprint on regression hosts
mnt_point=$(get_mount_point $i)
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point
+ endtime=$(expr $(date +%s) - $starttime)
+
+ echo "Memory Used after $i volumes : $(pmap -x $(pgrep glusterfsd) | grep total)"
+ echo "Thread Count after $i volumes: $(ps -T -p $(pgrep glusterfsd) | wc -l)"
+ echo "Time taken : ${endtime} seconds"
done
+echo "=========="
+echo "List of all the threads in the Brick process"
+ps -T -p $(pgrep glusterfsd)
+echo "=========="
+
# Kill glusterd, and wait a bit for all traces to disappear.
TEST killall -9 glusterd
sleep 5
diff --git a/tests/bugs/core/bug-1650403.t b/tests/bugs/core/bug-1650403.t
new file mode 100644
index 00000000000..43d09bc8bd9
--- /dev/null
+++ b/tests/bugs/core/bug-1650403.t
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=500
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+cleanup;
+
+NUM_VOLS=5
+MOUNT_BASE=$(dirname $M0)
+
+# GlusterD reports that bricks are started when in fact their attach requests
+# might still need to be retried. That's a bit of a hack, but there's no
+# feasible way to wait at that point (in attach_brick) and the rest of the
+# code is unprepared to deal with transient errors so the whole "brick start"
+# would fail. Meanwhile, glusterfsd can only handle attach requests at a
+# rather slow rate. After GlusterD tries to start a couple of hundred bricks,
+# glusterfsd can fall behind and we start getting mount failures. Arguably,
+# those are spurious because we will eventually catch up. We're just not
+# ready *yet*. More to the point, even if the errors aren't spurious that's
+# not what we're testing right now. Therefore, we give glusterfsd a bit more
+# breathing room for this test than we would otherwise.
+MOUNT_TIMEOUT=15
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+get_mount_point () {
+ printf "%s/vol%02d" $MOUNT_BASE $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ local brick_base=$(get_brick_base $1)
+ local cmd="$CLI volume create $vol_name replica 3"
+ local b
+ for b in $(seq 0 5); do
+ local this_brick=${brick_base}/brick$b
+ mkdir -p $this_brick
+ cmd="$cmd $H0:$this_brick"
+ done
+ TEST $cmd
+ TEST $CLI volume start $vol_name
+ # check for 6 bricks and 1 shd daemon to be up and running
+ EXPECT_WITHIN 120 7 count_up_bricks $vol_name
+ local mount_point=$(get_mount_point $1)
+ mkdir -p $mount_point
+ TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
+}
+
+cleanup_func () {
+ local v
+ for v in $(seq 1 $NUM_VOLS); do
+ local mount_point=$(get_mount_point $v)
+ force_umount $mount_point
+ rm -rf $mount_point
+ local vol_name=$(printf "%s-vol%02d" $V0 $v)
+ $CLI volume stop $vol_name
+ $CLI volume delete $vol_name
+ rm -rf $(get_brick_base $1) &
+ done &> /dev/null
+ wait
+}
+push_trapfunc cleanup_func
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=24
+for i in $(seq 1 $NUM_VOLS); do
+ create_volume $i
+ TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+ # Unmounting to reduce memory footprint on regression hosts
+ mnt_point=$(get_mount_point $i)
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point
+done
+
+glustershd_pid=`ps auxwww | grep glustershd | grep -v grep | awk -F " " '{print $2}'`
+TEST [ $glustershd_pid != 0 ]
+start=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'`
+echo "Memory consumption for glustershd process"
+for i in $(seq 1 50); do
+ pmap -x $glustershd_pid | grep total
+ for j in $(seq 1 $NUM_VOLS); do
+ vol_name=$(printf "%s-vol%02d" $V0 $j)
+ gluster v set $vol_name cluster.self-heal-daemon off > /dev/null
+ gluster v set $vol_name cluster.self-heal-daemon on > /dev/null
+ done
+done
+
+end=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'`
+diff=$((end-start))
+
+# If memory consumption is more than 10M it means some leak in reconfigure
+# code path
+
+TEST [ $diff -lt 10000 ]
+
+trap - EXIT
+cleanup
diff --git a/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
new file mode 100644
index 00000000000..1acbaa8dc0b
--- /dev/null
+++ b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+cleanup
+
+#bug-1444596 - validating brick mux
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume stop $V1
+# At the time initialize brick daemon it always keeps open
+# standard fd's (0, 1 , 2) so after stop 1 volume fd's should
+# be open
+nofds=$(ls -lrth /proc/`pgrep glusterfsd`/fd | grep dev/null | wc -l)
+TEST [ $((nofds)) -eq 3 ]
+
+cleanup
diff --git a/tests/bugs/core/bug-927616.t b/tests/bugs/core/bug-927616.t
index 6bb64743183..18257131ac7 100755
--- a/tests/bugs/core/bug-927616.t
+++ b/tests/bugs/core/bug-927616.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/ctime/issue-832.t b/tests/bugs/ctime/issue-832.t
new file mode 100755
index 00000000000..740f731ab73
--- /dev/null
+++ b/tests/bugs/ctime/issue-832.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+#Trigger trusted.glusterfs.mdata setting codepath and see things work as expected
+cleanup
+
+TEST_USER=test-ctime-user
+TEST_UID=27341
+
+TEST useradd -o -M -u ${TEST_UID} ${TEST_USER}
+push_trapfunc "userdel --force ${TEST_USER}"
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+$GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+echo abc > $M0/test
+TEST chmod 755 $M0/
+TEST chmod 744 $M0/test
+TEST setfattr -x trusted.glusterfs.mdata $B0/$V0/test
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+$GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+su ${TEST_USER} -c "cat $M0/test"
+TEST getfattr -n trusted.glusterfs.mdata $B0/$V0/test
+
+cleanup
diff --git a/tests/bugs/distribute/bug-1125824.t b/tests/bugs/distribute/bug-1125824.t
index 3bafbf31fe5..7e401092273 100755
--- a/tests/bugs/distribute/bug-1125824.t
+++ b/tests/bugs/distribute/bug-1125824.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
create_files () {
for i in {1..10}; do
orig=$(printf %s/file%04d $1 $i)
diff --git a/tests/bugs/distribute/bug-1161156.t b/tests/bugs/distribute/bug-1161156.t
index fed90e7f478..2b9e15407ca 100755
--- a/tests/bugs/distribute/bug-1161156.t
+++ b/tests/bugs/distribute/bug-1161156.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
QDD=$(dirname $0)/quota
diff --git a/tests/bugs/distribute/bug-1161311.t b/tests/bugs/distribute/bug-1161311.t
index d88642edc32..62796068928 100755
--- a/tests/bugs/distribute/bug-1161311.t
+++ b/tests/bugs/distribute/bug-1161311.t
@@ -1,5 +1,7 @@
#!/bin/bash
+SCRIPT_TIMEOUT=350
+
# This tests for hard link preservation for files that are linked, when the
# file is undergoing migration
@@ -74,14 +76,12 @@ TEST glusterfs -s $H0 --volfile-id $V0 $M0;
TEST mkdir $M0/dir1
TEST mkdir -p $M0/dir2/dir3
-# Create a large file (6.4 GB), so that rebalance takes time
-# Reading from /dev/urandom is slow, so we'll cat it together
-dd if=/dev/urandom of=/tmp/FILE2 bs=64k count=10240
-for i in {1..10}; do
- cat /tmp/FILE2 >> $M0/dir1/FILE2
-done
-
-#dd if=/dev/urandom of=$M0/dir1/FILE2 bs=64k count=10240
+# Create a large file (8 GB), so that rebalance takes time
+# Since we really don't care about the contents of the file, we use fallocate
+# to generate the file much faster. We could also use truncate, which is even
+# faster, but rebalance could take advantage of an sparse file and migrate it
+# in an optimized way, but we don't want a fast migration.
+TEST fallocate -l 8G $M0/dir1/FILE2
# Rename the file to create a linkto, for rebalance to
# act on the file
@@ -89,6 +89,8 @@ done
## into separate bricks when brick count is 3
TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1
+brick_loc=$(get_backend_paths $M0/dir1/FILE1)
+
# unmount and remount the volume
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST glusterfs -s $H0 --volfile-id $V0 $M0;
@@ -98,7 +100,7 @@ TEST $CLI volume rebalance $V0 start force
# Wait for FILE to get the sticky bit on, so that file is under
# active rebalance, before creating the links
-TEST checksticky $B0/${V0}3/dir1/FILE1
+TEST checksticky $brick_loc
# Create the links
## FILE3 FILE5 FILE7 have hashes, c8c91469 566d26ce 22ce7eba
@@ -119,7 +121,7 @@ cd /
# Ideally for this test to have done its job, the file should still be
# under migration, so check the sticky bit again
-TEST checksticky $B0/${V0}3/dir1/FILE1
+TEST checksticky $brick_loc
# Wait for rebalance to complete
EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
@@ -152,6 +154,11 @@ TEST ln ./dir1/FILE7 ./FILE7
cd /
linkcountsrc=$(stat -c %h $M0/dir1/FILE1)
TEST [[ $linkcountsrc == 14 ]]
+
+
+# Stop the volume
+TEST $CLI volume stop $V0;
+
UMOUNT_LOOP ${B0}/${V0}{1..3}
rm -f ${B0}/brick{1..3}
cleanup;
diff --git a/tests/bugs/distribute/bug-1190734.t b/tests/bugs/distribute/bug-1190734.t
index d48d74d2c35..9256088f7a0 100644
--- a/tests/bugs/distribute/bug-1190734.t
+++ b/tests/bugs/distribute/bug-1190734.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
BRICK_COUNT=3
FILE_COUNT=100
FILE_COUNT_TIME=5
diff --git a/tests/bugs/distribute/bug-1193636.t b/tests/bugs/distribute/bug-1193636.t
index ccde02edc70..b377910336e 100644
--- a/tests/bugs/distribute/bug-1193636.t
+++ b/tests/bugs/distribute/bug-1193636.t
@@ -41,11 +41,13 @@ dd if=/dev/zero of=$M0/dir1/FILE2 bs=64k count=10240
# act on the file
TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1
+brick_loc=$(get_backend_paths $M0/dir1/FILE1)
+
build_tester $(dirname $0)/bug-1193636.c
TEST $CLI volume rebalance $V0 start force
-TEST checksticky $B0/${V0}3/dir1/FILE1
+TEST checksticky $brick_loc
TEST setfattr -n "user.test1" -v "test1" $M0/dir1/FILE1
TEST setfattr -n "user.test2" -v "test1" $M0/dir1/FILE1
diff --git a/tests/bugs/distribute/bug-1600379.t b/tests/bugs/distribute/bug-1600379.t
new file mode 100644
index 00000000000..8d2f6154100
--- /dev/null
+++ b/tests/bugs/distribute/bug-1600379.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+#------------------------------------------------------------
+
+# Test case - Remove xattr from killed brick on lookup
+#------------------------------------------------------------
+# Create a dir and set custom xattr
+TEST mkdir $M0/testdir
+TEST setfattr -n user.attr -v val $M0/testdir
+xattr_val=`getfattr -d $B0/${V0}2/testdir | awk '{print $1}'`;
+TEST ${xattr_val}='user.attr="val"';
+
+# Kill 2nd brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+
+# Remove custom xattr
+TEST setfattr -x user.attr $M0/testdir
+
+# Bring up the killed brick process
+TEST $CLI volume start $V0 force
+
+# Perform lookup
+sleep 5
+TEST ls $M0/testdir
+
+# Check brick xattrs
+xattr_val_2=`getfattr -d $B0/${V0}2/testdir`;
+TEST [ ${xattr_val_2} = ''] ;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1667804.t b/tests/bugs/distribute/bug-1667804.t
new file mode 100644
index 00000000000..3f7c43111d7
--- /dev/null
+++ b/tests/bugs/distribute/bug-1667804.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+function confirm_all_linkto_files ()
+{
+ inpath=$1
+ for infile in $inpath/*
+ do
+ echo $infile
+ ret1=$(is_dht_linkfile $infile)
+ if [ "$ret1" -eq 0 ]; then
+ echo "$infile is not a linkto file"
+ echo 0
+ return
+ fi
+ done
+ echo 1
+}
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create files and rename them in order to create linkto files
+TEST mkdir -p $M0/dir0/dir1
+TEST touch $M0/dir0/dir1/file-{1..50}
+
+for i in {1..50}; do
+ mv $M0/dir0/dir1/file-$i $M0/dir0/dir1/nfile-$i;
+done
+
+#Remove the second brick to force the creation of linkto files
+#on the removed brick
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}2"
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+EXPECT "1" confirm_all_linkto_files $B0/${V0}2/dir0/dir1
+
+#Modify the xattrs of the linkto files on the removed brick to point to itself.
+
+target=$(cat $M0/.meta/graphs/active/$V0-dht/subvolumes/1/name)
+
+setfattr -n trusted.glusterfs.dht.linkto -v "$target\0" $B0/${V0}2/dir0/dir1/nfile*
+
+
+TEST rm -rf $M0/dir0
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1786679.t b/tests/bugs/distribute/bug-1786679.t
new file mode 100755
index 00000000000..219ce51c8a9
--- /dev/null
+++ b/tests/bugs/distribute/bug-1786679.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+
+# create 2 subvols
+# create a dir
+# create a file
+# change layout
+# remove the file
+# execute create from a different mount
+# Without the patch, the file will be present on both of the bricks
+
+cleanup
+
+function get_layout () {
+
+layout=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | gawk -F"=" '{print $2}'`
+
+echo $layout
+
+}
+
+function set_layout()
+{
+ setfattr -n "trusted.glusterfs.dht" -v $1 $2
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK1=$B0/${V0}-0
+BRICK2=$B0/${V0}-1
+
+TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2
+TEST $CLI volume start $V0
+
+# Mount FUSE and create symlink
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+TEST ! stat "$BRICK1/dir/file"
+TEST stat "$BRICK2/dir/file"
+
+layout1="$(get_layout "$BRICK1/dir")"
+layout2="$(get_layout "$BRICK2/dir")"
+
+TEST set_layout $layout1 "$BRICK2/dir"
+TEST set_layout $layout2 "$BRICK1/dir"
+
+TEST rm $M0/dir/file -f
+TEST gluster v set $V0 client-log-level DEBUG
+
+#Without the patch in place, this client will create the file in $BRICK2
+#which will lead to two files being on both the bricks when a new client
+#create the file with the same name
+TEST touch $M0/dir/file
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+TEST touch $M1/dir/file
+
+TEST stat "$BRICK1/dir/file"
+TEST ! stat "$BRICK2/dir/file"
+
+cleanup
diff --git a/tests/bugs/distribute/issue-1327.t b/tests/bugs/distribute/issue-1327.t
new file mode 100755
index 00000000000..acd8c8c6614
--- /dev/null
+++ b/tests/bugs/distribute/issue-1327.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK1=$B0/${V0}-0
+BRICK2=$B0/${V0}-1
+
+TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+
+#remove dir from one of the brick
+TEST rmdir $BRICK2/dir
+
+#safe cache timeout for lookup to be triggered
+sleep 2
+
+TEST ls $M0/dir
+
+TEST stat $BRICK2/dir
+
+cleanup
diff --git a/tests/bugs/distribute/overlap.py b/tests/bugs/distribute/overlap.py
index 0941d377624..2813979787b 100755
--- a/tests/bugs/distribute/overlap.py
+++ b/tests/bugs/distribute/overlap.py
@@ -17,7 +17,7 @@ def calculate_one (ov, nv):
def calculate_all (values):
total = 0
- nv_index = len(values) / 2
+ nv_index = len(values) // 2
for old_val in values[:nv_index]:
new_val = values[nv_index]
nv_index += 1
diff --git a/tests/bugs/ec/bug-1187474.t b/tests/bugs/ec/bug-1187474.t
index fb4b2082f8f..e6344c26e73 100644
--- a/tests/bugs/ec/bug-1187474.t
+++ b/tests/bugs/ec/bug-1187474.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
function check_dir()
{
local count
diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t
index 9395aa33e8c..9181e73ec19 100644
--- a/tests/bugs/ec/bug-1236065.t
+++ b/tests/bugs/ec/bug-1236065.t
@@ -2,6 +2,7 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
+SCRIPT_TIMEOUT=400
cleanup
@@ -84,7 +85,6 @@ TEST pidof glusterd
EXPECT "$V0" volinfo_field $V0 'Volume Name'
EXPECT 'Started' volinfo_field $V0 'Status'
EXPECT '7' online_brick_count
-
## cleanup
cd
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
diff --git a/tests/bugs/ec/bug-1699866-check-reopen-fd.t b/tests/bugs/ec/bug-1699866-check-reopen-fd.t
new file mode 100644
index 00000000000..4386d010318
--- /dev/null
+++ b/tests/bugs/ec/bug-1699866-check-reopen-fd.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST mkdir -p $M0/dir
+
+fd="$(fd_available)"
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+TEST fd_open ${fd} rw $M0/dir/test
+TEST fd_write ${fd} "test1"
+TEST $CLI volume replace-brick ${V0} $H0:$B0/${V0}0 $H0:$B0/${V0}0_1 commit force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST fd_write ${fd} "test2"
+TEST fd_close ${fd}
+
+cleanup
diff --git a/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
new file mode 100644
index 00000000000..67fdb184b46
--- /dev/null
+++ b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function do_ls() {
+ local dir="${1}"
+ local i
+
+ for i in {1..50}; do
+ ls -l $M0/${dir} >/dev/null &
+ ls -l $M1/${dir} >/dev/null &
+ ls -l $M2/${dir} >/dev/null &
+ ls -l $M3/${dir} >/dev/null &
+ done
+ wait
+}
+
+function measure_time() {
+ {
+ LC_ALL=C
+ time -p "${@}"
+ } 2>&1 | awk '/^real/ { print $2 * 1000 }'
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume set $V0 features.locks-notify-contention on
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 10
+TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 10
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M2
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M3
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file.{1..10}
+
+# Run multiple 'ls' concurrently from multiple clients so that they collide and
+# cause partial locks.
+TEST [[ $(measure_time do_ls dir) -lt 10000 ]]
+
+cleanup
diff --git a/tests/bugs/fuse/bug-858215.t b/tests/bugs/fuse/bug-858215.t
index d2719a6e1d4..95999f6ad24 100755
--- a/tests/bugs/fuse/bug-858215.t
+++ b/tests/bugs/fuse/bug-858215.t
@@ -41,8 +41,8 @@ TEST stat $M0/newfile;
TEST rm $M0/newfile;
nfs_pid=$(cat $GLUSTERD_PIDFILEDIR/nfs/nfs.pid || echo -1);
-glustershd_pid=$(cat $GLUSTERD_PIDFILEDIR/glustershd/glustershd.pid || echo -1);
-
+glustershd_pid=`ps auxwww | grep glustershd | grep -v grep | awk -F " " '{print $2}'`
+TEST [ $glustershd_pid != 0 ];
pids=$(pidof glusterfs);
for i in $pids
do
diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t
index d10fd9f8b41..ffa6df54144 100644
--- a/tests/bugs/fuse/bug-985074.t
+++ b/tests/bugs/fuse/bug-985074.t
@@ -30,7 +30,7 @@ TEST glusterd
TEST $CLI volume create $V0 $H0:$B0/$V0
TEST $CLI volume start $V0
-TEST $CLI volume set $V0 md-cache-timeout 3
+TEST $CLI volume set $V0 performance.stat-prefetch off
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0
@@ -40,8 +40,7 @@ TEST ln $M0/file $M0/file.link
TEST ls -ali $M0 $M1
TEST rm -f $M1/file.link
TEST ls -ali $M0 $M1
-# expire the md-cache timeout
-sleep 3
+
TEST mv $M0/file $M0/file.link
TEST stat $M0/file.link
TEST ! stat $M0/file
diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t
index d959f750ee0..a51b1bc7267 100755
--- a/tests/bugs/fuse/many-groups-for-acl.t
+++ b/tests/bugs/fuse/many-groups-for-acl.t
@@ -38,6 +38,13 @@ do
done
TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+# Linux < 3.8 exports only first 32 gids of pid to userspace
+kernel_exports_few_gids=0
+if [ "$OSTYPE" = Linux ] && \
+ su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then
+ kernel_exports_few_gids=1
+fi
+
# preparation done, start the tests
TEST glusterd
@@ -48,6 +55,8 @@ TEST $CLI volume set $V0 nfs.disable off
TEST $CLI volume set ${V0} server.manage-gids off
TEST $CLI volume start ${V0}
+# This is just a synchronization hack to make sure the bricks are
+# up before going on.
EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available
# mount the volume with POSIX ACL support, without --resolve-gids
@@ -69,8 +78,8 @@ TEST [ $? -eq 0 ]
su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null"
TEST [ $? -eq 0 ]
-su -m ${NEW_USER} -c "touch ${M0}/gid-64/failure > /dev/null"
-TEST [ $? -ne 0 ]
+su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null"
+TEST [ $? -eq $kernel_exports_few_gids ]
su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null"
TEST [ $? -ne 0 ]
diff --git a/tests/bugs/gfapi/bug-1319374.c b/tests/bugs/gfapi/bug-1319374.c
index bd80462e3ba..ea0dfb6b0f2 100644
--- a/tests/bugs/gfapi/bug-1319374.c
+++ b/tests/bugs/gfapi/bug-1319374.c
@@ -3,6 +3,7 @@
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#define NO_INIT 1
diff --git a/tests/bugs/gfapi/bug-1447266/bug-1447266.t b/tests/bugs/gfapi/bug-1447266/bug-1447266.t
index 2bf72f8c6d7..45547f4f0e7 100644
--- a/tests/bugs/gfapi/bug-1447266/bug-1447266.t
+++ b/tests/bugs/gfapi/bug-1447266/bug-1447266.t
@@ -56,5 +56,5 @@ TEST ! $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/.././snap3"
TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/../."
TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/./snap1/./../snap1/dir/."
-cleanup_tester $(dirname $0)/bug-1319374
+cleanup_tester $(dirname $0)/bug-1447266
cleanup;
diff --git a/tests/bugs/glusterd/859927/repl.t b/tests/bugs/glusterd/859927/repl.t
index 70143e2c193..6e7c23b5b1d 100755
--- a/tests/bugs/glusterd/859927/repl.t
+++ b/tests/bugs/glusterd/859927/repl.t
@@ -23,6 +23,9 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
TEST $CLI volume start $V0
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0;
diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
index 4e570381701..b6af487a791 100644
--- a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
+++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
@@ -7,6 +7,20 @@ function count_brick_processes {
pgrep glusterfsd | wc -l
}
+function count_brick_pids {
+ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+function count_N/A_brick_pids {
+ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -- '\-1' | sort | uniq | wc -l
+}
+
+function check_peers {
+ $CLI_2 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
cleanup;
TEST launch_cluster 3
@@ -48,4 +62,47 @@ TEST $CLI_1 volume stop $V1
EXPECT 3 count_brick_processes
-cleanup
+TEST $CLI_1 volume stop $META_VOL
+
+TEST $CLI_1 volume delete $META_VOL
+TEST $CLI_1 volume delete $V0
+TEST $CLI_1 volume delete $V1
+
+#bug-1773856 - Brick process fails to come up with brickmux on
+
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 $H3:$B3/${V0}1 force
+TEST $CLI_1 volume start $V0
+
+
+EXPECT 3 count_brick_processes
+
+#create and start a new volume
+TEST $CLI_1 volume create $V1 $H1:$B1/${V1}2 $H2:$B2/${V1}2 $H3:$B3/${V1}2 force
+TEST $CLI_1 volume start $V1
+
+EXPECT 3 count_brick_processes
+
+V2=patchy2
+TEST $CLI_1 volume create $V2 $H1:$B1/${V2}3 $H2:$B2/${V2}3 $H3:$B3/${V2}3 force
+TEST $CLI_1 volume start $V2
+
+EXPECT 3 count_brick_processes
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+
+TEST kill_node 1
+
+sleep 10
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+$CLI_2 volume set $V0 performance.readdir-ahead on
+$CLI_2 volume set $V1 performance.readdir-ahead on
+
+TEST $glusterd_1;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids
+
+cleanup;
diff --git a/tests/bugs/glusterd/brick-mux-validation.t b/tests/bugs/glusterd/brick-mux-validation.t
index 03a476823ca..61b0455f9a8 100644
--- a/tests/bugs/glusterd/brick-mux-validation.t
+++ b/tests/bugs/glusterd/brick-mux-validation.t
@@ -24,7 +24,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}
TEST $CLI volume start $V0
EXPECT 1 count_brick_processes
-EXPECT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
pkill gluster
@@ -101,4 +101,4 @@ TEST $CLI_IGNORE_PARTITION volume reset-brick $V1 $H0:$B0/${V1}1 $H0:$B0/${V1}1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
EXPECT 1 count_brick_processes
-cleanup; \ No newline at end of file
+cleanup;
diff --git a/tests/bugs/glusterd/brick-mux.t b/tests/bugs/glusterd/brick-mux.t
index eeaa3ebfea8..927940534c1 100644
--- a/tests/bugs/glusterd/brick-mux.t
+++ b/tests/bugs/glusterd/brick-mux.t
@@ -39,7 +39,7 @@ TEST glusterd
EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
EXPECT 1 count_brick_processes
-TEST $CLI volume set $V1 performance.cache-size 32MB
+TEST $CLI volume set $V1 performance.io-cache-size 32MB
TEST $CLI volume stop $V1
TEST $CLI volume start $V1
diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t
new file mode 100644
index 00000000000..0be31dac768
--- /dev/null
+++ b/tests/bugs/glusterd/brick-order-check-add-brick.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3 -NO_DEBUG -NO_FORCE
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 replica 3 $H1:$L1/$V0 $H2:$L2/$V0 $H3:$L3/$V0
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#add-brick with or without mentioning the replica count should not fail
+TEST $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}_1 $H2:$L2/${V0}_1 $H3:$L3/${V0}_1
+EXPECT '2 x 3 = 6' volinfo_field $V0 'Number of Bricks'
+
+TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_2 $H2:$L2/${V0}_2 $H3:$L3/${V0}_2
+EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
+
+#adding bricks from same host should fail the brick order check
+TEST ! $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5
+EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
+
+#adding bricks from same host with force should succeed
+TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force
+EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks'
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#Add-brick with Increasing replica count
+TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+
+#Add-brick with Increasing replica count from same host should fail
+TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3
+
+#adding multiple bricks from same host should fail the brick order check
+TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9}
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1070734.t b/tests/bugs/glusterd/bug-1070734.t
index ea160d7ec6b..0afcb3b37b3 100755
--- a/tests/bugs/glusterd/bug-1070734.t
+++ b/tests/bugs/glusterd/bug-1070734.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
## Start glusterd
diff --git a/tests/bugs/glusterd/bug-1595320.t b/tests/bugs/glusterd/bug-1595320.t
index 3a289f386de..c10e11821a1 100644
--- a/tests/bugs/glusterd/bug-1595320.t
+++ b/tests/bugs/glusterd/bug-1595320.t
@@ -48,7 +48,7 @@ b2_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-2*.pid)
b3_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-3*.pid)
kill -9 $brick_pid
-EXPECT 0 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_brick_processes
# Unmount 3rd brick root from node
brick_root=$L3
diff --git a/tests/bugs/glusterd/bug-1696046.t b/tests/bugs/glusterd/bug-1696046.t
new file mode 100644
index 00000000000..e1c1eb2ceb9
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1696046.t
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function count_up_bricks {
+ $CLI --xml volume status $1 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+logdir=`gluster --print-logdir`
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{1,2,3};
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume start $V1;
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V1
+
+EXPECT 1 count_brick_processes
+
+# Mount V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+function client-log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+function brick-log-file-name()
+{
+ logfilename=$B0"/"$V0"1.log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`client-log-file-name`
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+brick_log_file=$logdir"/bricks/"`brick-log-file-name`
+nofdlog=$(cat $brick_log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+## Set brick-log-level to DEBUG
+TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG
+
+# Do some operation
+touch $M0/file1
+
+# Check debug message debug message should be exist only for V0
+# Server xlator is common in brick_mux so after enabling DEBUG log
+# some debug message should be available for other xlators like posix
+
+brick_log_file=$logdir"/bricks/"`brick-log-file-name`
+nofdlog=$(cat $brick_log_file | grep file1 | grep -v server | wc -l)
+TEST [ $((nofdlog)) -ne 0 ]
+
+#Check if any debug log exist in client-log file
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+## Set brick-log-level to INFO
+TEST $CLI volume set $V0 diagnostics.brick-log-level INFO
+
+## Set client-log-level to DEBUG
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+# Do some operation
+touch $M0/file2
+
+nofdlog=$(cat $brick_log_file | grep " D " | grep file2 | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -ne 0 ]
+
+# Unmount V0
+TEST umount $M0
+
+#Mount V1
+TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#do some operation
+touch $M0/file3
+
+
+# DEBUG log level is enabled only for V0 so no debug message should be available
+# in log specific to file2 creation except for server xlator, server xlator is
+# common xlator in brick mulitplex
+nofdlog=$(cat $brick_log_file | grep file3 | grep -v server | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+# Unmount V1
+TEST umount $M0
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t
new file mode 100644
index 00000000000..bb8d4f46eb8
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1699339.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+NUM_VOLS=15
+
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name}
+ TEST $CLI_1 volume start $vol_name
+}
+
+TEST launch_cluster 3
+TEST $CLI_1 volume set all cluster.brick-multiplex on
+
+# The option accepts the value in the range from 5 to 200
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4
+
+TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=28
+for i in $(seq 1 $NUM_VOLS); do
+ starttime="$(date +%s)";
+ create_volume $i
+done
+
+TEST kill_glusterd 1
+
+TESTS_EXPECTED_IN_LOOP=4
+for i in `seq 1 3 15`
+do
+vol1=$(printf "%s-vol%02d" $V0 $i)
+TEST $CLI_2 volume set $vol1 performance.readdir-ahead on
+done
+
+# Bring back 1st glusterd
+TEST $glusterd_1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TESTS_EXPECTED_IN_LOOP=4
+for i in `seq 1 3 15`
+do
+vol1=$(printf "%s-vol%02d" $V0 $i)
+EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead
+done
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1720566.t b/tests/bugs/glusterd/bug-1720566.t
new file mode 100644
index 00000000000..99bcf6ff785
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1720566.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+
+cleanup;
+V0="TestLongVolnamec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9abcd"
+V1="TestLongVolname3102bd28a16c49440bd5210e4ec4d5d93102bd28a16c49440bd5210e4ec4d5d933102bd28a16c49440bd5210e4ebbcd"
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+$CLI_1 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1
+EXPECT 'Created' cluster_volinfo_field 1 $V1 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V1
+EXPECT 'Started' cluster_volinfo_field 1 $V1 'Status';
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
+
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V1 $M1;
+
+TEST mkdir $M0/dir{1..4};
+TEST touch $M0/dir{1..4}/files{1..4};
+
+TEST mkdir $M1/dir{1..4};
+TEST touch $M1/dir{1..4}/files{1..4};
+
+TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}_1 $H2:$B2/${V0}_1
+TEST $CLI_1 volume add-brick $V1 $H1:$B1/${V1}_1 $H2:$B2/${V1}_1
+
+
+TEST $CLI_1 volume rebalance $V0 start
+TEST $CLI_1 volume rebalance $V1 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V1
+
+cleanup;
diff --git a/tests/bugs/glusterd/check_elastic_server.t b/tests/bugs/glusterd/check_elastic_server.t
new file mode 100644
index 00000000000..41d2140aa2b
--- /dev/null
+++ b/tests/bugs/glusterd/check_elastic_server.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+function cluster_rebalance_status {
+ local vol=$1
+ $CLI_2 volume status | grep -iw "Rebalance" -A 5 | grep "Status" | sed 's/.*: //'
+}
+
+cleanup;
+TEST launch_cluster 4;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+TEST $CLI_1 peer probe $H4;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+#Mount invalid volume
+TEST ! glusterfs -s $H1 --volfile-id=$V0_NA $M0;
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
+
+TEST mkdir $M0/dir{1..4};
+TEST touch $M0/dir{1..4}/files{1..4};
+
+TEST $CLI_1 volume remove-brick $V0 $H1:$B1/$V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_remove_brick_status_completed_field "$V0 $H1:$B1/$V0"
+
+TEST $CLI_1 volume remove-brick $V0 $H1:$B1/$V0 commit
+
+kill_glusterd 1
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+TEST $CLI_2 volume add-brick $V0 $H3:$B3/$V0
+
+TEST $CLI_2 volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status $V0
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+TEST $CLI_2 volume add-brick $V0 $H4:$B4/$V0
+
+TEST $CLI_2 volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status $V0
+kill_glusterd 2
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t b/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t
index fdc0a73f60c..8001359e6b3 100644
--- a/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t
+++ b/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t
@@ -4,7 +4,7 @@
. $(dirname $0)/../../cluster.rc
function check_peers {
-$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
}
cleanup
@@ -36,23 +36,35 @@ TEST [[ $OP_VERS_ORIG == $OP_VERS_NEW ]]
#bug-948686 - volume sync after bringing up the killed node
TEST $CLI_1 peer probe $H3
-EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 3
TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/$V0 $H1:$B1/${V0}_1 $H2:$B2/$V0 $H3:$B3/$V0
TEST $CLI_1 volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V0 'Status'
TEST glusterfs --volfile-server=$H1 --volfile-id=$V0 $M0
#kill a node
TEST kill_node 3
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers 2
#modify volume config to see change in volume-sync
TEST $CLI_1 volume set $V0 write-behind off
#add some files to the volume to see effect of volume-heal cmd
TEST touch $M0/{1..100};
TEST $CLI_1 volume stop $V0;
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 'Stopped' volinfo_field_1 $V0 'Status'
+
TEST $glusterd_3;
-EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 3
+
+sleep 5
TEST $CLI_3 volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V0 'Status'
TEST $CLI_2 volume stop $V0;
TEST $CLI_2 volume delete $V0;
diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t
index dd98a65fa9a..b89ca22415e 100644
--- a/tests/bugs/glusterd/optimized-basic-testcases.t
+++ b/tests/bugs/glusterd/optimized-basic-testcases.t
@@ -32,6 +32,16 @@ function get_brick_host_uuid()
echo $host_uuid_list | awk '{print $1}'
}
+function generate_statedump_and_check_for_glusterd_info {
+ pid=`pidof glusterd`
+ #remove old stale statedumps
+ cleanup_statedump $pid
+ kill -USR1 $pid
+ #Wait till the statedump is generated
+ sleep 1
+ fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.")
+ cat $statedumpdir/$fname | grep "xlator.glusterd.priv" | wc -l
+}
cleanup;
@@ -59,6 +69,11 @@ TEST pidof glusterd;
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
EXPECT 'Created' volinfo_field $V0 'Status';
+#bug-1786478 - default volume option after volume reset
+addr_family=`volinfo_field $V0 'transport.address-family'`
+TEST $CLI volume reset $V0
+EXPECT $addr_family volinfo_field $V0 'transport.address-family'
+
#bug-955588 - uuid validation
uuid=`grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f2 -d=`
@@ -114,7 +129,8 @@ TEST ! $CLI volume set all $V0 cluster.op-version $OP_VERS_NEW
#bug-1022055 - validate log rotate command
-TEST $CLI volume log rotate $V0;
+TEST ! $CLI volume log rotate $V0;
+TEST $CLI volume log $V0 rotate;
#bug-1092841 - validating barrier enable/disable
@@ -276,7 +292,14 @@ TEST ! $CLI volume create "test" $H0:/var/lib/glusterd force
TEST ! $CLI volume create "test" $H0:/var/lib/glusterd/abc
TEST ! $CLI volume create "test" $H0:/var/lib/glusterd/abc force
mkdir -p /xyz/var/lib/glusterd/abc
-TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc
+
+#bug 1716812 - volfile should be created with transport type both
+TEST $CLI volume create "test" transport tcp,rdma $H0:/xyz/var/lib/glusterd/abc
EXPECT 'Created' volinfo_field "test" 'Status';
+#While taking a statedump, there is a TRY_LOCK on call_frame, which might may cause
+#failure. So Adding a EXPECT_WITHIN
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" generate_statedump_and_check_for_glusterd_info
+
+cleanup_statedump `pidof glusterd`
cleanup
diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t
index 05aef4edccb..3cc3351b43b 100644
--- a/tests/bugs/glusterd/quorum-validation.t
+++ b/tests/bugs/glusterd/quorum-validation.t
@@ -34,9 +34,13 @@ TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2
TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start
TEST ! $CLI_1 volume set $V0 barrier enable
-# Now execute a command which goes through op state machine and it should fail
+#quorum is not met, rebalance/profile start should fail
+TEST ! $CLI_1 volume rebalance $V0 start
TEST ! $CLI_1 volume profile $V0 start
+#bug-1690753 - Volume stop when quorum not met is successful
+TEST ! $CLI_1 volume stop $V0
+
#Bring back the 2nd glusterd
TEST $glusterd_2
diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t
index 9565faef01d..469ec6cd48e 100644
--- a/tests/bugs/glusterd/rebalance-in-cluster.t
+++ b/tests/bugs/glusterd/rebalance-in-cluster.t
@@ -4,6 +4,10 @@
. $(dirname $0)/../../cluster.rc
. $(dirname $0)/../../volume.rc
+function rebalance_status_field_1 {
+ $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p
+}
+
cleanup;
TEST launch_cluster 2;
TEST $CLI_1 peer probe $H2;
@@ -29,6 +33,11 @@ TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
TEST $CLI_1 volume rebalance $V0 start
EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm
+TEST kill_glusterd 2
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0
+
+TEST start_glusterd 2
#bug-1245142
$CLI_1 volume rebalance $V0 start &
diff --git a/tests/bugs/glusterd/rebalance-operations-in-single-node.t b/tests/bugs/glusterd/rebalance-operations-in-single-node.t
index 9144b4a5000..ef85887f440 100644
--- a/tests/bugs/glusterd/rebalance-operations-in-single-node.t
+++ b/tests/bugs/glusterd/rebalance-operations-in-single-node.t
@@ -119,13 +119,13 @@ TEST touch $M0/dir{21..30}/files{1..10};
TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{7,8}
TEST $CLI volume rebalance $V0 start force
-EXPECT_WITHIN 90 "completed" rebalance_status_field $V0
+EXPECT_WITHIN 180 "completed" rebalance_status_field $V0
TEST pkill gluster
TEST glusterd
TEST pidof glusterd
# status should be "completed" immediate after glusterd has respawned.
-EXPECT_WITHIN 5 "completed" rebalance_status_field $V0
+EXPECT_WITHIN 20 "completed" rebalance_status_field $V0
cleanup
diff --git a/tests/bugs/glusterd/enable-shared-storage-and-remove-brick-validation.t b/tests/bugs/glusterd/remove-brick-validation.t
index 11ed0d94d79..a0ff4ff6a24 100644
--- a/tests/bugs/glusterd/enable-shared-storage-and-remove-brick-validation.t
+++ b/tests/bugs/glusterd/remove-brick-validation.t
@@ -18,20 +18,6 @@ TEST $CLI_1 peer probe $H2;
EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
-#test case for bug 1266818 - disabling enable-shared-storage option
-##should not delete user created volume with name glusterd_shared_storage
-
-## creating a volume with name glusterd_shared_storage
-TEST $CLI_1 volume create glusterd_shared_storage $H1:$B1/${V0}0 $H2:$B2/${V0}1
-TEST $CLI_1 volume start glusterd_shared_storage
-
-## disabling enable-shared-storage should not succeed and should not delete the
-## user created volume with name "glusterd_shared_storage"
-TEST ! $CLI_1 volume all enable-shared-storage disable
-
-## volume with name should exist
-TEST $CLI_1 volume info glusterd_shared_storage
-
#testcase: bug-1245045-remove-brick-validation
TEST $CLI_1 peer probe $H3;
diff --git a/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t b/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t
index 20c84d26b9c..00beab59137 100644
--- a/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t
+++ b/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t
@@ -49,6 +49,7 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
#Create a 3x3 dist-rep volume
TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{0,1,2,3,4,5,6,7,8};
TEST $CLI volume start $V1
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "9" brick_count ${V1}
# Mount FUSE and create file/directory
TEST glusterfs -s $H0 --volfile-id $V1 $M0
diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
index cdb1a3399c9..e6e65c48456 100644
--- a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
+++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
@@ -55,9 +55,9 @@ TEST kill_glusterd 1
#Bring back 1st glusterd
TEST $glusterd_1
-# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not
-# come up on node 2
-sleep $PROCESS_UP_TIMEOUT
-EXPECT "N" shd_up_status_2
+# We need to wait till PROCESS_UP_TIMEOUT and then check shd service started
+#on node 2, because once glusterd regains quorum, it will restart all volume
+#level daemons
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
cleanup;
diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
new file mode 100644
index 00000000000..a871e112d87
--- /dev/null
+++ b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
@@ -0,0 +1,54 @@
+#! /bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+count=`$CLI_3 peer status | grep 'Peer in Cluster (Connected)' | wc -l`
+echo $count
+}
+
+function check_shd {
+ps aux | grep $1 | grep glustershd | wc -l
+}
+
+cleanup
+
+
+TEST launch_cluster 6
+
+TESTS_EXPECTED_IN_LOOP=25
+for i in $(seq 2 6); do
+ hostname="H$i"
+ TEST $CLI_1 peer probe ${!hostname}
+done
+
+
+EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers;
+for i in $(seq 1 5); do
+
+ TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i
+ TEST $CLI_1 volume start ${V0}_$i force
+
+done
+
+#kill a node
+TEST kill_node 3
+
+TEST $glusterd_3;
+EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3
+
+for i in $(seq 1 5); do
+
+ TEST $CLI_1 volume stop ${V0}_$i
+ TEST $CLI_1 volume delete ${V0}_$i
+
+done
+
+for i in $(seq 1 6); do
+ hostname="H$i"
+ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname}
+done
+cleanup
diff --git a/tests/bugs/glusterd/validating-options-for-replicated-volume.t b/tests/bugs/glusterd/validating-options-for-replicated-volume.t
index ee231338ff1..ddc80b17870 100644
--- a/tests/bugs/glusterd/validating-options-for-replicated-volume.t
+++ b/tests/bugs/glusterd/validating-options-for-replicated-volume.t
@@ -61,10 +61,15 @@ TEST ! $CLI volume set $V0 background-self-heal-count " "
TEST $CLI volume set $V0 background-self-heal-count 10
EXPECT "10" volume_option $V0 cluster.background-self-heal-count
-TEST ! $CLI volume set $V0 cache-size ""
-TEST ! $CLI volume set $V0 cache-size " "
-TEST $CLI volume set $V0 cache-size 512MB
-EXPECT "512MB" volume_option $V0 performance.cache-size
+TEST ! $CLI volume set $V0 io-cache-size ""
+TEST ! $CLI volume set $V0 io-cache-size " "
+TEST $CLI volume set $V0 io-cache-size 64MB
+EXPECT "64MB" volume_option $V0 performance.io-cache-size
+
+TEST ! $CLI volume set $V0 quick-read-cache-size ""
+TEST ! $CLI volume set $V0 quick-read-cache-size " "
+TEST $CLI volume set $V0 quick-read-cache-size 512MB
+EXPECT "512MB" volume_option $V0 performance.quick-read-cache-size
TEST ! $CLI volume set $V0 self-heal-daemon ""
TEST ! $CLI volume set $V0 self-heal-daemon " "
diff --git a/tests/bugs/glusterfs-server/bug-852147.t b/tests/bugs/glusterfs-server/bug-852147.t
index c644cfa62dc..75db2a26e05 100755
--- a/tests/bugs/glusterfs-server/bug-852147.t
+++ b/tests/bugs/glusterfs-server/bug-852147.t
@@ -66,7 +66,7 @@ ren_file=$log_file".*"
rm -rf $ren_file
#Initiating log rotate
-TEST $CLI volume log rotate $V0
+TEST $CLI volume log $V0 rotate
#Capturing new log file's size
new_file_size=`file-size $log_file`
diff --git a/tests/bugs/glusterfs-server/bug-864222.t b/tests/bugs/glusterfs-server/bug-864222.t
index 3a46c283599..01a7a4e3afd 100755
--- a/tests/bugs/glusterfs-server/bug-864222.t
+++ b/tests/bugs/glusterfs-server/bug-864222.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/glusterfs-server/bug-873549.t b/tests/bugs/glusterfs-server/bug-873549.t
index a3b2f9c9bf7..8b5534728fd 100644
--- a/tests/bugs/glusterfs-server/bug-873549.t
+++ b/tests/bugs/glusterfs-server/bug-873549.t
@@ -10,7 +10,7 @@ TEST $CLI volume info;
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
-TEST $CLI volume set $V0 performance.cache-size 512MB
+TEST $CLI volume set $V0 performance.quick-read-cache-size 512MB
TEST $CLI volume start $V0
TEST $CLI volume statedump $V0 all
diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t
index aeb73ed94dd..300000bcf2c 100755
--- a/tests/bugs/glusterfs-server/bug-877992.t
+++ b/tests/bugs/glusterfs-server/bug-877992.t
@@ -46,7 +46,9 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1;
EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
EXPECT 'createPre' cat /tmp/pre.out;
-EXPECT 'createPost' cat /tmp/post.out;
+# Spost.sh comes after S10selinux-label-brick.sh under create post hook script
+# list. So consider the delay in setting SELinux context on bricks
+EXPECT_WITHIN 5 'createPost' cat /tmp/post.out;
hooks_cleanup 'create'
diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t
index 9b940259f55..db2cf3c050b 100755
--- a/tests/bugs/glusterfs-server/bug-887145.t
+++ b/tests/bugs/glusterfs-server/bug-887145.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
@@ -27,7 +29,15 @@ chmod 600 $M0/file;
TEST mount_nfs $H0:/$V0 $N0 nolock;
-chown -R nfsnobody:nfsnobody $M0/dir;
+grep nfsnobody /etc/passwd > /dev/null
+if [ $? -eq 1 ]; then
+usr=nobody
+grp=nobody
+else
+usr=nfsnobody
+grp=nfsnobody
+fi
+chown -R $usr:$grp $M0/dir;
chown -R tmp_user:tmp_user $M0/other;
TEST $CLI volume set $V0 server.root-squash on;
@@ -36,7 +46,7 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
# create files and directories in the root of the glusterfs and nfs mount
# which is owned by root and hence the right behavior is getting EACCESS
-# as the fops are executed as nfsnobody.
+# as the fops are executed as nfsnobody/nobody.
touch $M0/foo 2>/dev/null;
TEST [ $? -ne 0 ]
touch $N0/foo 2>/dev/null;
@@ -59,7 +69,7 @@ cat $N0/passwd 1>/dev/null;
TEST [ $? -eq 0 ]
# create files and directories should succeed as the fops are being executed
-# inside the directory owned by nfsnobody
+# inside the directory owned by nfsnobody/nobody
TEST touch $M0/dir/file;
TEST touch $N0/dir/foo;
TEST mkdir $M0/dir/new;
diff --git a/tests/bugs/glusterfs-server/bug-904300.t b/tests/bugs/glusterfs-server/bug-904300.t
index eea1c5b5463..95d5d381c8b 100755
--- a/tests/bugs/glusterfs-server/bug-904300.t
+++ b/tests/bugs/glusterfs-server/bug-904300.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
# 1-8
diff --git a/tests/bugs/glusterfs/bug-844688.t b/tests/bugs/glusterfs/bug-844688.t
index 39f04092cf7..65f41b342a5 100755
--- a/tests/bugs/glusterfs/bug-844688.t
+++ b/tests/bugs/glusterfs/bug-844688.t
@@ -3,6 +3,17 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
+function check_callstack_log {
+ local text=$1
+ statedump_file=$(generate_mount_statedump $V0);
+ grep $text $statedump_file 2>/dev/null 1>/dev/null;
+ if [ $? -eq 0 ]; then
+ echo "1";
+ else
+ echo "0";
+ fi;
+}
+
cleanup;
TEST glusterd
@@ -15,19 +26,29 @@ mount_pid=$(get_mount_process_pid $V0);
# enable dumping of call stack creation and frame creation times in statedump
# monitoring is enabled by default
-TEST touch $M0/touchfile;
-(dd if=/dev/urandom of=$M0/file bs=5k 2>/dev/null 1>/dev/null)&
-back_pid=$!;
-statedump_file=$(generate_mount_statedump $V0);
-grep "callstack-creation-time" $statedump_file 2>/dev/null 1>/dev/null;
-TEST [ $? -eq 0 ];
-grep "frame-creation-time" $statedump_file 2>/dev/null 1>/dev/null;
-TEST [ $? -eq 0 ];
+# We want to make sure that there is a pending frame in gluster stack.
+# For that we are creating a blocking lock scenario.
+
+TEST touch $M0/lockfile;
+# Open two fd's on the same file
+exec 8>$M0/lockfile;
+exec 9>$M0/lockfile;
+
+# First flock will succeed and the second one will block, hence the background run.
+flock -x 8 ;
+flock -x 9 &
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" check_callstack_log "callstack-creation-time";
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" check_callstack_log "frame-creation-time";
+
+flock -u 8
+flock -u 9;
-kill -SIGTERM $back_pid;
-wait >/dev/null 2>&1;
+# Closing the fd's
+exec 8>&-
+exec 9>&-
-TEST rm -f $M0/touchfile $M0/file;
+TEST rm -f $M0/lockfile;
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
rm -f $statedumpdir/glusterdump.$mount_pid.*;
diff --git a/tests/bugs/glusterfs/bug-867253.t b/tests/bugs/glusterfs/bug-867253.t
index c2c6c2ab629..8c3c39baace 100644
--- a/tests/bugs/glusterfs/bug-867253.t
+++ b/tests/bugs/glusterfs/bug-867253.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
function file_count()
diff --git a/tests/bugs/glusterfs/bug-872923.t b/tests/bugs/glusterfs/bug-872923.t
index 72e8f230864..00e02c89cbe 100755
--- a/tests/bugs/glusterfs/bug-872923.t
+++ b/tests/bugs/glusterfs/bug-872923.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t
index db84a223089..db71cc0f6fe 100644
--- a/tests/bugs/glusterfs/bug-873962-spb.t
+++ b/tests/bugs/glusterfs/bug-873962-spb.t
@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 performance.write-behind off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
TEST $CLI volume set $V0 cluster.background-self-heal-count 0
TEST $CLI volume start $V0
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
diff --git a/tests/bugs/glusterfs/bug-902610.t b/tests/bugs/glusterfs/bug-902610.t
index b45e92b8a3b..112c947e116 100755
--- a/tests/bugs/glusterfs/bug-902610.t
+++ b/tests/bugs/glusterfs/bug-902610.t
@@ -28,7 +28,7 @@ function get_layout()
fi
# Figure out where the join point is.
- target=$( $PYTHON -c "print '%08x' % (0x$layout1_e + 1)")
+ target=$( $PYTHON -c "print('%08x' % (0x$layout1_e + 1))")
#echo "target for layout2 = $target" > /dev/tty
# The second layout should cover everything that the first doesn't.
diff --git a/tests/bugs/logging/bug-823081.t b/tests/bugs/logging/bug-823081.t
index 0ed8f4c26c1..bd1965d2d49 100755
--- a/tests/bugs/logging/bug-823081.t
+++ b/tests/bugs/logging/bug-823081.t
@@ -22,20 +22,20 @@ function set_tail ()
set_tail $V0;
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
-tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 5-`
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
TEST [[ \"$tail\" == \"$tail_success\" ]]
TEST ! $CLI volume create $V0 $H0:$B0/${V0}{1,2};
-tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 5-`
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
TEST [[ \"$tail\" == \"$tail_failure\" ]]
set_tail $V1;
TEST gluster volume create $V1 $H0:$B0/${V1}{1,2} force;
-tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 5-`
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
TEST [[ \"$tail\" == \"$tail_success_force\" ]]
TEST ! gluster volume create $V1 $H0:$B0/${V1}{1,2} force;
-tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 5-`
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
TEST [[ \"$tail\" == \"$tail_failure_force\" ]]
cleanup;
diff --git a/tests/bugs/md-cache/bug-1632503.t b/tests/bugs/md-cache/bug-1632503.t
new file mode 100755
index 00000000000..aeb57f65639
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1632503.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TESTS_EXPECTED_IN_LOOP=5
+
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.md-cache-statfs on
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+for i in $(seq 1 5); do
+ TEST_IN_LOOP df $M0;
+done
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1726205.t b/tests/bugs/md-cache/bug-1726205.t
new file mode 100644
index 00000000000..795130e9bd8
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1726205.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 group samba
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/file
+TEST "setfattr -n "user.DosStream.Zone.Identifier:\$DATA" -v '\0' $M0/file"
+TEST "getfattr -n "user.DosStream.Zone.Identifier:\$DATA" -e hex $M0/file | grep -q 0x00"
+
+cleanup;
diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t
index f616eb2baa5..2f53172e24c 100755
--- a/tests/bugs/nfs/bug-1053579.t
+++ b/tests/bugs/nfs/bug-1053579.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup
# prepare the users and groups
diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
index b194b3744b4..c360db4c91c 100644
--- a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
+++ b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
@@ -2,6 +2,9 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t
index 740d638193d..dea609ed193 100644
--- a/tests/bugs/nfs/bug-1157223-symlink-mounting.t
+++ b/tests/bugs/nfs/bug-1157223-symlink-mounting.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
## Start and create a volume
diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t
index 1304ad905bf..45a22e79336 100644
--- a/tests/bugs/nfs/bug-1161092-nfs-acls.t
+++ b/tests/bugs/nfs/bug-1161092-nfs-acls.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t
index f986fe36ab7..c4f51a2d446 100755
--- a/tests/bugs/nfs/bug-1166862.t
+++ b/tests/bugs/nfs/bug-1166862.t
@@ -5,6 +5,8 @@
# Based on: bug-904065.t
#
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
# count the lines of a file, return 0 if the file does not exist
function count_lines()
{
diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t
index 4232b9d8748..b5c9245affd 100644
--- a/tests/bugs/nfs/bug-1210338.t
+++ b/tests/bugs/nfs/bug-1210338.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
NFS_SOURCE=$(dirname $0)/bug-1210338.c
diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t
index 3b836745a07..5ccee722ed9 100755
--- a/tests/bugs/nfs/bug-847622.t
+++ b/tests/bugs/nfs/bug-847622.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
case $OSTYPE in
NetBSD)
echo "Skip test on ACL which are not available on NetBSD" >&2
diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t
index a47893d7fcb..dca315a3d01 100755
--- a/tests/bugs/nfs/bug-877885.t
+++ b/tests/bugs/nfs/bug-877885.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t
index effd5972c9a..0eba86e7ee8 100755
--- a/tests/bugs/nfs/bug-904065.t
+++ b/tests/bugs/nfs/bug-904065.t
@@ -7,6 +7,8 @@
# sufficient.
#
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
# count the lines of a file, return 0 if the file does not exist
function count_lines()
{
diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t
index d70c36f0a53..bd279157c25 100755
--- a/tests/bugs/nfs/bug-915280.t
+++ b/tests/bugs/nfs/bug-915280.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t
index 7047825b3d4..975c46f85a4 100755
--- a/tests/bugs/nfs/bug-974972.t
+++ b/tests/bugs/nfs/bug-974972.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
#This script checks that nfs mount does not fail lookup on files with split-brain
cleanup;
diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t
index f1b6859d528..c6c9c35d60a 100644
--- a/tests/bugs/nfs/showmount-many-clients.t
+++ b/tests/bugs/nfs/showmount-many-clients.t
@@ -12,6 +12,8 @@
# the groups into their own structures, this testcase passes.
#
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t
index db64bbb303b..d9b9e959ce3 100644
--- a/tests/bugs/nfs/socket-as-fifo.t
+++ b/tests/bugs/nfs/socket-as-fifo.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t
index a00959443d0..6a114877ac7 100644
--- a/tests/bugs/nfs/subdir-trailing-slash.t
+++ b/tests/bugs/nfs/subdir-trailing-slash.t
@@ -10,6 +10,7 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
cleanup;
TEST glusterd
diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t
index 631240a692f..2a940091ad9 100755
--- a/tests/bugs/nfs/zero-atime.t
+++ b/tests/bugs/nfs/zero-atime.t
@@ -8,6 +8,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup
TEST glusterd
diff --git a/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t b/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t
index 016e637dd0d..3839c6e3380 100755
--- a/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t
+++ b/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t
@@ -11,6 +11,10 @@ function get_gid() {
stat -c '%g' $1;
}
+function check_stat() {
+ stat $1
+ echo $?
+}
cleanup;
@@ -37,7 +41,10 @@ EXPECT 100 get_uid $M0;
EXPECT 101 get_gid $M0;
TEST $CLI volume stop $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" online_brick_count
+
TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
@@ -46,6 +53,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" check_stat $M0
EXPECT 100 get_uid $M0;
EXPECT 101 get_gid $M0;
diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t
new file mode 100644
index 00000000000..4d08b69b9b0
--- /dev/null
+++ b/tests/bugs/posix/bug-1651445.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup
+
+TEST verify_lvm_version
+TEST glusterd
+TEST pidof glusterd
+TEST init_n_bricks 3
+TEST setup_lvm 3
+
+TEST $CLI volume create $V0 replica 3 $H0:$L{1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+#Setting the size in bytes
+TEST $CLI volume set $V0 storage.reserve 40MB
+
+#wait 5s to reset disk_space_full flag
+sleep 5
+
+TEST dd if=/dev/zero of=$M0/a bs=100M count=1
+TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+# Wait 5s to update disk_space_full flag because thread check disk space
+# after every 5s
+
+sleep 5
+# setup_lvm create lvm partition of 150M and 40M are reserve so after
+# consuming more than 110M next dd should fail
+TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
+TEST dd if=/dev/urandom of=$M0/a bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc
+
+rm -rf $M0/*
+
+#Setting the size in percent and repeating the above steps
+TEST $CLI volume set $V0 storage.reserve 40
+
+sleep 5
+
+TEST dd if=/dev/zero of=$M0/a bs=80M count=1
+TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+sleep 5
+TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/protocol/bug-1321578.t b/tests/bugs/protocol/bug-1321578.t
index 160fc408fba..83904817467 100644
--- a/tests/bugs/protocol/bug-1321578.t
+++ b/tests/bugs/protocol/bug-1321578.t
@@ -6,6 +6,7 @@ check_mounted () {
df | grep $1 | wc -l
}
+CHECK_MOUNT_TIMEOUT=7
TEST glusterd
TEST $CLI volume create $V0 $H0:$B0/$V0
@@ -23,15 +24,59 @@ $CLI system getspec $V0 | sed -e /username/d -e /password/d > fubar.vol
# This mount should fail because auth.allow doesn't include us.
TEST $GFS -f fubar.vol $M0
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+# Add tests when only username is present, but not password
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
+
+# This mount should fail because auth.allow doesn't include our password.
+TEST $GFS -f fubar.vol $M0
+
# If we had DONT_EXPECT_WITHIN we could use that, but we don't.
-sleep 10
-EXPECT 0 check_mounted $M0
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+# Now, add a test for login failure when server doesn't have the password entry
+# Add tests when only username is present, but not password
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 > fubar.vol
+TEST $CLI volume stop $V0
+
+sed -i -e '/password /d' /var/lib/glusterd/vols/$V0/$V0.*$V0.vol
+
+TEST $CLI volume start $V0
+
+# This mount should fail because auth.allow doesn't include our password.
+TEST $GFS -f fubar.vol $M0
+
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
# Set auth.allow to include us. This mount should therefore succeed.
TEST $CLI volume set $V0 auth.allow $H0
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
+
+TEST $GFS -f fubar.vol $M0
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 1 check_mounted $M0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Set auth.reject to include us. This mount should therefore fail.
+TEST $CLI volume stop $V0
+
+TEST $CLI volume set $V0 auth.allow "\*"
+TEST $CLI volume set $V0 auth.reject $H0
+TEST $CLI volume start $V0
+
+# Do this, so login module is not in picture
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
TEST $GFS -f fubar.vol $M0
-sleep 10
-EXPECT 1 check_mounted $M0
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
cleanup
diff --git a/tests/bugs/protocol/bug-1390914.t b/tests/bugs/protocol/bug-1390914.t
new file mode 100644
index 00000000000..e3dab92de5a
--- /dev/null
+++ b/tests/bugs/protocol/bug-1390914.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+cleanup;
+
+#test that fops are not wound on anon-fd when fd is not open on that brick
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST fd_open 200 'w' "$M0/1"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#lk should only happen on 2 bricks, if there is a bug, it will plant a lock
+#with anon-fd on first-brick which will never be released because flush won't
+#be wound below server xlator for anon-fd
+TEST flock -x -n 200
+TEST fd_close 200
+
+TEST fd_open 200 'w' "$M0/1"
+#this lock will fail if there is a stale lock
+TEST flock -x -n 200
+TEST fd_close 200
+cleanup;
diff --git a/tests/bugs/protocol/bug-1433815-auth-allow.t b/tests/bugs/protocol/bug-1433815-auth-allow.t
index fa22ad8afd5..a78c0eb7111 100644
--- a/tests/bugs/protocol/bug-1433815-auth-allow.t
+++ b/tests/bugs/protocol/bug-1433815-auth-allow.t
@@ -17,6 +17,7 @@ TEST $CLI volume create $V0 $H0:$B0/$V0
# Set auth.allow so it *doesn't* include ourselves.
TEST $CLI volume set $V0 auth.allow 1.2.3.4
TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
# "System getspec" will include the username and password if the request comes
# from a server (which we are). Unfortunately, this will cause authentication
diff --git a/tests/bugs/quota/bug-1035576.t b/tests/bugs/quota/bug-1035576.t
index eaf4439a063..cbc1b69ebb3 100644
--- a/tests/bugs/quota/bug-1035576.t
+++ b/tests/bugs/quota/bug-1035576.t
@@ -18,6 +18,9 @@ TEST $CLI volume set $V0 performance.write-behind off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.read-ahead off
TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
TEST $CLI volume quota $V0 enable
TEST kill_brick $V0 $H0 $B0/${V0}0
diff --git a/tests/bugs/quota/bug-1087198.t b/tests/bugs/quota/bug-1087198.t
index 95133085f13..618a46b957d 100644
--- a/tests/bugs/quota/bug-1087198.t
+++ b/tests/bugs/quota/bug-1087198.t
@@ -17,6 +17,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
QDD=$(dirname $0)/quota
diff --git a/tests/bugs/quota/bug-1153964.t b/tests/bugs/quota/bug-1153964.t
index d84a9b36d26..2e449d3ba00 100644
--- a/tests/bugs/quota/bug-1153964.t
+++ b/tests/bugs/quota/bug-1153964.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
function rename_loop()
{
local i=0
diff --git a/tests/bugs/quota/bug-1243798.t b/tests/bugs/quota/bug-1243798.t
index 53f8b10adba..fa6abeb08fb 100644
--- a/tests/bugs/quota/bug-1243798.t
+++ b/tests/bugs/quota/bug-1243798.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/readdir-ahead/bug-1436090.t b/tests/bugs/readdir-ahead/bug-1436090.t
index 58e9093f1c3..e0877f15684 100755
--- a/tests/bugs/readdir-ahead/bug-1436090.t
+++ b/tests/bugs/readdir-ahead/bug-1436090.t
@@ -19,12 +19,12 @@ EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
TEST glusterfs -s $H1 --volfile-id $V0 $M0;
TEST mkdir $M0/dir1
-# Create a large file (3.2 GB), so that rebalance takes time
-# Reading from /dev/urandom is slow, so we will cat it together
-dd if=/dev/urandom of=/tmp/FILE2 bs=64k count=10240
-for i in {1..5}; do
- cat /tmp/FILE2 >> $M0/dir1/foo
-done
+# Create a large file (4 GB), so that rebalance takes time
+# Since we really don't care about the contents of the file, we use fallocate
+# to generate the file much faster. We could also use truncate, which is even
+# faster, but rebalance could take advantage of an sparse file and migrate it
+# in an optimized way, but we don't want a fast migration.
+TEST fallocate -l 4G $M0/dir1/foo
TEST mv $M0/dir1/foo $M0/dir1/bar
diff --git a/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t b/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t
new file mode 100644
index 00000000000..6adfc17c92c
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 readdir-ahead on #on by default as of writing this .t.
+TEST $CLI volume set $V0 consistent-metadata on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST touch $M0/FILE
+echo "abc" >> $M0/FILE
+EXPECT "^0$" echo $?
+EXPECT "abc" cat $M0/FILE
+echo "truncate" >$M0/FILE
+EXPECT "^0$" echo $?
+EXPECT "truncate" cat $M0/FILE
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1046624.t b/tests/bugs/replicate/bug-1046624.t
index 9ae40879228..e2762ea6764 100755
--- a/tests/bugs/replicate/bug-1046624.t
+++ b/tests/bugs/replicate/bug-1046624.t
@@ -25,11 +25,12 @@ TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
## Mount native
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 --use-readdirp=no
+TEST ${GFS} --volfile-server=$H0 --volfile-id=$V0 --use-readdirp=no $M0
TEST `echo "TEST-FILE" > $M0/File`
TEST `mkdir $M0/Dir`
TEST kill_brick $V0 $H0 $B0/${V0}-0
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
TEST `ln -s $M0/File $M0/Link1`
TEST `ln -s $M0/Dir $M0/Link2`
diff --git a/tests/bugs/replicate/bug-1058797.t b/tests/bugs/replicate/bug-1058797.t
index 99ab3eb3a66..598062a0dab 100644
--- a/tests/bugs/replicate/bug-1058797.t
+++ b/tests/bugs/replicate/bug-1058797.t
@@ -12,6 +12,9 @@ TEST glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1};
TEST $CLI volume start $V0
TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
# FUSE mount;create a file
TEST glusterfs -s $H0 --volfile-id $V0 $M0
diff --git a/tests/bugs/replicate/bug-1101647.t b/tests/bugs/replicate/bug-1101647.t
index 8f420eec012..708bc1a1e29 100644
--- a/tests/bugs/replicate/bug-1101647.t
+++ b/tests/bugs/replicate/bug-1101647.t
@@ -12,6 +12,8 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
TEST $CLI volume start $V0;
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
#Create base entry in indices/xattrop
echo "Data">$M0/file
diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t
index c7988fd648b..c7509f33cc2 100644
--- a/tests/bugs/replicate/bug-1130892.t
+++ b/tests/bugs/replicate/bug-1130892.t
@@ -16,6 +16,11 @@ EXPECT 'Created' volinfo_field $V0 'Status';
# Disable self-heal daemon
TEST gluster volume set $V0 self-heal-daemon off
+# Enable Client side heal
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
# Disable all perf-xlators
TEST $CLI volume set $V0 performance.quick-read off
TEST $CLI volume set $V0 performance.io-cache off
@@ -28,7 +33,7 @@ TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
# FUSE Mount
-TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST ${GFS} -s $H0 --volfile-id $V0 $M0
# Create files and dirs
TEST mkdir -p $M0/one/two/
@@ -36,6 +41,7 @@ TEST `echo "Carpe diem" > $M0/one/two/three`
# Simulate disk-replacement
TEST kill_brick $V0 $H0 $B0/${V0}-1
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
TEST rm -rf $B0/${V0}-1/one
TEST rm -rf $B0/${V0}-1/.glusterfs
@@ -50,10 +56,12 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
TEST stat $M0/one
+sleep 1
+
# Check pending xattrs
EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
-EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
TEST gluster volume set $V0 self-heal-daemon on
diff --git a/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t b/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t
index 44c2ed25f9d..b69a38ae788 100644
--- a/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t
+++ b/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t
@@ -10,6 +10,9 @@ TEST pidof glusterd
TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
@@ -22,9 +25,11 @@ iatt=$(stat -c "%g:%u:%A" file)
TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT 2 get_pending_heal_count $V0
#Trigger metadataheal
TEST stat file
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
#iattrs must be matching
iatt1=$(stat -c "%g:%u:%A" $B0/brick0/file)
diff --git a/tests/bugs/replicate/bug-1180545.t b/tests/bugs/replicate/bug-1180545.t
index e9531625ee2..5e40edd6c38 100644
--- a/tests/bugs/replicate/bug-1180545.t
+++ b/tests/bugs/replicate/bug-1180545.t
@@ -7,6 +7,31 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../afr.rc
+function check_sh_entries() {
+ local expected="$1"
+ local count=
+ local good="0"
+ shift
+
+ for i in $*; do
+ count="$(count_sh_entries $i)"
+ if [[ "x${count}" == "x${expected}" ]]; then
+ good="$((good + 1))"
+ fi
+ done
+ if [[ "x${good}" != "x${last_good}" ]]; then
+ last_good="${good}"
+# This triggers a sweep of the heal index. However if more than one brick
+# tries to heal the same directory at the same time, one of them will take
+# the lock and the other will give up, waiting for the next heal cycle, which
+# is set to 60 seconds (the minimum valid value). So, each time we detect
+# that one brick has completed the heal, we trigger another heal.
+ $CLI volume heal $V0
+ fi
+
+ echo "${good}"
+}
+
cleanup;
TEST glusterd
@@ -15,6 +40,7 @@ TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
TEST $CLI volume set $V0 cluster.heal-timeout 60
TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
TEST $CLI volume start $V0
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
@@ -35,13 +61,16 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
#Trigger heal and verify number of entries in backend
TEST $CLI volume set $V0 cluster.self-heal-daemon on
-EXPECT_WITHIN PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
TEST $CLI volume heal $V0
-EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/brick0
-EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/brick1
+last_good=""
+
+EXPECT_WITHIN $HEAL_TIMEOUT "2" check_sh_entries 2 $B0/brick{0,1}
+
#Two entries for DIR and two for FILE
EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0
TEST diff <(ls $B0/brick0/DIR) <(ls $B0/brick1/DIR)
diff --git a/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t b/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t
index c4752c488f4..6ff471fbf15 100644
--- a/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t
+++ b/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t
@@ -11,19 +11,27 @@ TEST pidof glusterd;
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume start $V0;
-TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
TEST mkdir $M0/dir
TEST touch $M0/dir/file{1..5}
#Create entry split-brain
TEST kill_brick $V0 $H0 $B0/$V0"1"
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
TEST touch $M0/dir/FILE
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "^Y$" force_umount $M0
TEST $CLI volume start $V0 force
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1
TEST kill_brick $V0 $H0 $B0/$V0"0"
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
TEST touch $M0/dir/FILE
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "^Y$" force_umount $M0
TEST $CLI volume start $V0 force
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1
cd $M0/dir
EXPECT "6" echo $(ls | wc -l)
diff --git a/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t b/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t
index 5467127bd59..6d177a7d3f8 100644
--- a/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t
+++ b/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t
@@ -3,6 +3,9 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
index 0767f47fdda..10ce0131f4f 100644
--- a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
+++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
@@ -49,25 +49,15 @@ TEST $CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
-#Kill brick 0 and turn on the client side heal and do ls to trigger the heal.
-#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0.
-TEST kill_brick $V0 $H0 $B0/${V0}0
+# We were killing one brick and checking that entry heal does not reset the
+# pending xattrs for the down brick. Now that we need all bricks to be up for
+# entry heal, I'm removing that test from the .t
+
TEST $CLI volume set $V0 cluster.data-self-heal on
TEST $CLI volume set $V0 cluster.metadata-self-heal on
TEST $CLI volume set $V0 cluster.entry-self-heal on
TEST ls $M0
-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
-
-#Bring back all the bricks and trigger the heal again by doing ls. Now the
-#pending xattrs on all the bricks should be 0.
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
-TEST ls $M0
-
TEST cat $M0/f1
TEST cat $M0/f2
TEST cat $M0/f3
diff --git a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t b/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
deleted file mode 100644
index 054a4adb90d..00000000000
--- a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/bin/bash
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-cleanup;
-
-TEST glusterd
-TEST pidof glusterd
-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
-TEST $CLI volume start $V0
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
-TEST $CLI volume set $V0 cluster.metadata-self-heal off
-TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
-TEST touch $M0/file
-
-# Kill B1, create a pending metadata heal.
-TEST kill_brick $V0 $H0 $B0/${V0}0
-TEST setfattr -n user.xattr -v value1 $M0/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
-
-# Kill B2, heal from B3 to B1.
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
-TEST kill_brick $V0 $H0 $B0/${V0}1
-TEST $CLI volume set $V0 cluster.self-heal-daemon on
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
-$CLI volume heal $V0
-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata"
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
-
-# Create another pending metadata heal.
-TEST setfattr -n user.xattr -v value2 $M0/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
-
-# Kill B1, heal from B3 to B2
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
-TEST kill_brick $V0 $H0 $B0/${V0}0
-TEST $CLI volume set $V0 cluster.self-heal-daemon on
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
-$CLI volume heal $V0
-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata"
-TEST $CLI volume set $V0 cluster.self-heal-daemon off
-
-# ALL bricks up again.
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
-# B1 and B2 blame each other, B3 doesn't blame anyone.
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file
-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file
-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file
-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file
-TEST $CLI volume set $V0 cluster.self-heal-daemon on
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
-TEST $CLI volume heal $V0
-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
-
-cleanup;
diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
index 125c35a7a21..8a79febf4b4 100644
--- a/tests/bugs/replicate/bug-1493415-gfid-heal.t
+++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
@@ -27,6 +27,11 @@ gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
TEST setfattr -x trusted.gfid $B0/${V0}1/f1
TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 2
+
# Assume there were no pending xattrs on parent dir due to 1st brick crashing
# too. Then name heal from client must heal the gfid.
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
@@ -52,6 +57,11 @@ TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
create_brick_xattrop_entry $B0/${V0}0 dir
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 2
+
#Trigger entry-heal via shd
TEST $CLI volume set $V0 self-heal-daemon on
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
index 26f90497d6f..49c4dea4e9c 100644
--- a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
+++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
@@ -53,8 +53,6 @@ TEST ! ls $B0/${V0}1/file$i
TEST ls $B0/${V0}2/file$i
dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2)
TEST [ "$dirty" != "000000000000000000000000" ]
-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i
-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i
TEST $CLI volume set $V0 self-heal-daemon on
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
diff --git a/tests/bugs/replicate/bug-1637249-gfid-heal.t b/tests/bugs/replicate/bug-1637249-gfid-heal.t
new file mode 100644
index 00000000000..e824f14531e
--- /dev/null
+++ b/tests/bugs/replicate/bug-1637249-gfid-heal.t
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+###############################################################################
+
+# Test for gfid + name heal when there is no 'source' brick, i.e. parent dir
+# xattrs are in split-brain or have dirty xattrs.
+
+TEST mkdir $M0/dir_pending
+TEST dd if=/dev/urandom of=$M0/dir_pending/file1 bs=1024 count=1024
+TEST mkdir $M0/dir_pending/dir11
+TEST mkdir $M0/dir_dirty
+TEST touch $M0/dir_dirty/file2
+
+# Set pending entry xattrs on dir_pending and remove gfid of entries under it on one brick.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir_pending
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/dir_pending
+
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/file1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/dir11)
+gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/dir11
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+
+# Set dirty entry xattrs on dir_dirty and remove gfid of entries under it on one brick.
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dir_dirty
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir_dirty/file2)
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_dirty/file2
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+# Create a file under dir_pending directly on the backend only on 1 brick
+TEST touch $B0/${V0}1/dir_pending/file3
+
+# Create a file under dir_pending directly on the backend on all bricks
+TEST touch $B0/${V0}0/dir_pending/file4
+TEST touch $B0/${V0}1/dir_pending/file4
+
+# Stop & start the volume and mount client again.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/dir_pending/file1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+TEST stat $M0/dir_pending/dir11
+EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_pending/dir11
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+
+TEST stat $M0/dir_dirty/file2
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir_dirty/file2
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+TEST stat $M0/dir_pending/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick.
+gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_pending/file3)
+TEST [ ! -z "$gfid_f3" ]
+EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_pending/file3
+
+TEST stat $M0/dir_pending/file4
+gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file4)
+TEST [ ! -z "$gfid_f4" ]
+EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file4
+###############################################################################
+
+# Test for gfid + name heal when all bricks are 'source', i.e. parent dir
+# does not have any pending or dirty xattrs.
+
+TEST mkdir $M0/dir_clean
+TEST dd if=/dev/urandom of=$M0/dir_clean/file1 bs=1024 count=1024
+TEST mkdir $M0/dir_clean/dir11
+
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/file1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/dir11)
+gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/dir11
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+# Create a file under dir_clean directly on the backend only on 1 brick
+TEST touch $B0/${V0}1/dir_clean/file3
+
+# Create a file under dir_clean directly on the backend on all bricks
+TEST touch $B0/${V0}0/dir_clean/file4
+TEST touch $B0/${V0}1/dir_clean/file4
+
+# Stop & start the volume and mount client again.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/dir_clean/file1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+TEST stat $M0/dir_clean/dir11
+EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_clean/dir11
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+TEST stat $M0/dir_clean/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick.
+gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_clean/file3)
+TEST [ ! -z "$gfid_f3" ]
+EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_clean/file3
+
+TEST stat $M0/dir_clean/file4
+gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file4)
+TEST [ ! -z "$gfid_f4" ]
+EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file4
+###############################################################################
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t b/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t
new file mode 100644
index 00000000000..63f72e86bf6
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST mkdir dir
+
+#Create metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 757 dir
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST chmod 747 dir
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#Use size as fav-child policy.
+TEST $CLI volume set $V0 cluster.favorite-child-policy size
+
+#Enable shd and heal the file.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+
+b1c1dir=$(afr_get_specific_changelog_xattr $B0/${V0}0/dir \
+ trusted.afr.$V0-client-1 "metadata")
+b2c0dir=$(afr_get_specific_changelog_xattr $B0/${V0}1/dir \
+ trusted.afr.$V0-client-0 "metadata")
+
+EXPECT "00000001" echo $b1c1dir
+EXPECT "00000001" echo $b2c0dir
+
+#Finish up
+TEST force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t b/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t
new file mode 100755
index 00000000000..319736e1157
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/file
+
+############ Healing using favorite-child-policy = size and size of bricks is same #################
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT_NOT "^0$" echo $?
+
+#We know that both bricks have same size file
+TEST $CLI volume set $V0 cluster.favorite-child-policy size
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT_NOT "^0$" echo $?
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup
+
diff --git a/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t
new file mode 100644
index 00000000000..783016dc3c0
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+# Conversion from 2x1 to 2x3
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST mkdir $M0/dir
+TEST dd if=/dev/urandom of=$M0/dir/file bs=100K count=5
+file_md5sum=$(md5sum $M0/dir/file | awk '{print $1}')
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{2..5}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5
+
+# Trigger heal and wait for for it to complete
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check whether the directory & file are healed to the newly added bricks
+TEST ls $B0/${V0}2/dir
+TEST ls $B0/${V0}3/dir
+TEST ls $B0/${V0}4/dir
+TEST ls $B0/${V0}5/dir
+
+TEST [ $file_md5sum == $(md5sum $B0/${V0}4/dir/file | awk '{print $1}') ]
+TEST [ $file_md5sum == $(md5sum $B0/${V0}5/dir/file | awk '{print $1}') ]
+
+
+# Conversion from 2x1 to 2x(2+1)
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1}
+EXPECT 'Created' volinfo_field $V1 'Status';
+TEST $CLI volume start $V1
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1
+
+TEST $GFS --volfile-id=$V1 --volfile-server=$H0 $M1;
+TEST mkdir $M1/dir
+TEST dd if=/dev/urandom of=$M1/dir/file bs=100K count=5
+file_md5sum=$(md5sum $M1/dir/file | awk '{print $1}')
+
+TEST $CLI volume add-brick $V1 replica 3 arbiter 1 $H0:$B0/${V1}{2..5}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}5
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 5
+
+# Trigger heal and wait for for it to complete
+TEST $CLI volume heal $V1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V1
+
+# Check whether the directory & file are healed to the newly added bricks
+TEST ls $B0/${V1}2/dir
+TEST ls $B0/${V1}3/dir
+TEST ls $B0/${V1}4/dir
+TEST ls $B0/${V1}5/dir
+
+EXPECT "0" stat -c %s $B0/${V1}5/dir/file
+TEST [ $file_md5sum == $(md5sum $B0/${V1}4/dir/file | awk '{print $1}') ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t b/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t
new file mode 100644
index 00000000000..b180f0e1239
--- /dev/null
+++ b/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST mkdir $M0/dir
+TEST "echo abc > $M0/file1"
+TEST "echo uvw > $M0/file2"
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST "echo def > $M0/file1"
+TEST "echo xyz > $M0/file2"
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Rename file1 and read it. Read should be served from the 3rd brick
+TEST mv $M0/file1 $M0/file3
+EXPECT "def" cat $M0/file3
+
+# Create a link to file2 and read it. Read should be served from the 3rd brick
+TEST ln $M0/file2 $M0/dir/file4
+EXPECT "xyz" cat $M0/dir/file4
+EXPECT "xyz" cat $M0/file2
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
new file mode 100644
index 00000000000..78581e99614
--- /dev/null
+++ b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+CHANGELOG_PATH_0="$B0/${V0}2/.glusterfs/changelogs"
+ROLLOVER_TIME=100
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/zero of=$M0/file1 bs=128K count=5
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume profile $V0 info
+truncate_count=$($CLI volume profile $V0 info | grep TRUNCATE | awk '{count += $8} END {print count}')
+
+EXPECT "1" echo $truncate_count
+EXPECT "1" check_changelog_op ${CHANGELOG_PATH_0} "^ D "
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t
new file mode 100755
index 00000000000..869cdb94bda
--- /dev/null
+++ b/tests/bugs/replicate/bug-1696599-io-hang.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+#Tests that local structures in afr are removed from granted/blocked list of
+#locks when inodelk fails on all bricks
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume set $V0 delay-gen locks
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable finodelk
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/file
+#Trigger write and stop bricks so inodelks fail on all bricks leading to
+#lock failure condition
+echo abc >> $M0/file &
+
+TEST $CLI volume stop $V0
+TEST $CLI volume reset $V0 delay-gen
+wait
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+#Test that only one write succeeded, this tests that delay-gen worked as
+#expected
+echo abc >> $M0/file
+EXPECT "abc" cat $M0/file
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
new file mode 100644
index 00000000000..76d1f2170f2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume heal $V0 disable
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+###############################################################################
+# Case of 2 bricks blaming the third and the third blaming the other two.
+
+TEST mkdir $M0/dir
+
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST setfattr -n user.metadata -v 1 $M0/dir
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir trusted.afr.$V0-client-1 metadata
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir trusted.afr.$V0-client-1 metadata
+CLIENT_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $M0/dir)
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir)
+
+TEST [ "$CLIENT_XATTR" == "$B0_XATTR" ]
+TEST [ "$CLIENT_XATTR" == "$B1_XATTR" ]
+TEST [ "$CLIENT_XATTR" == "$B2_XATTR" ]
+TEST setfattr -x user.metadata $M0/dir
+
+###############################################################################
+# Case of each brick blaming the next one in a cyclic manner
+
+TEST $CLI volume heal $V0 disable
+TEST `echo "hello" >> $M0/dir/file`
+# Mark cyclic xattrs and modify metadata directly on the bricks.
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"2"/dir/file
+
+setfattr -n user.metadata -v 1 $B0/$V0"0"/dir/file
+setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
+setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/file))
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
+
+TEST [ "$B0_XATTR" == "$B1_XATTR" ]
+TEST [ "$B0_XATTR" == "$B2_XATTR" ]
+TEST rm -f $M0/dir/file
+
+###############################################################################
+# Case of 2 bricks having quorum blaming and the other having only one blaming.
+
+TEST $CLI volume heal $V0 disable
+TEST `echo "hello" >> $M0/dir/file`
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST setfattr -n user.metadata -v 1 $M0/dir/file
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir/file trusted.afr.$V0-client-1 metadata
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir/file trusted.afr.$V0-client-1 metadata
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+
+# B0 must blame B2
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
+
+# Modify the metadata directly on the bricks B1 & B2.
+setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
+setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
+
+TEST [ "$B0_XATTR" == "$B1_XATTR" ]
+TEST [ "$B0_XATTR" == "$B2_XATTR" ]
+
+###############################################################################
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
new file mode 100644
index 00000000000..0aeaaafc84c
--- /dev/null
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 disable
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+
+##########################################################################################
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
+
+TEST touch $M0/dir/file
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+#B0 and B2 must blame B1
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Remove the gfid xattr and the link file on one brick.
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+
+# Wait for 2 second to force posix to consider that this is a valid file but
+# without gfid.
+sleep 2
+TEST $CLI volume heal $V0
+
+# Heal should not fail as the file is missing gfid xattr and the link file,
+# which is not actually the gfid or type mismatch.
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+rm -f $M0/dir/file
+
+
+###########################################################################################
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
+
+TEST $CLI volume heal $V0 disable
+TEST touch $M0/dir/file
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+#B0 and B2 must blame B1
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Remove the gfid xattr and the link file on two bricks.
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+
+# Wait for 2 second to force posix to consider that this is a valid file but
+# without gfid.
+sleep 2
+TEST $CLI volume heal $V0
+
+# Heal should not fail as the file is missing gfid xattr and the link file,
+# which is not actually the gfid or type mismatch.
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1728770-pass-xattrs.t b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
new file mode 100644
index 00000000000..159c4fcc6a1
--- /dev/null
+++ b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+function fop_on_bad_disk {
+ local path=$1
+ mkdir $path/dir{1..1000} 2>/dev/null
+ mv $path/dir1 $path/newdir
+ touch $path/foo.txt
+ echo $?
+}
+
+function ls_fop_on_bad_disk {
+ local path=$1
+ ls $path
+ echo $?
+}
+
+TEST init_n_bricks 6;
+TEST setup_lvm 6;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3 $H0:$L4 $H0:$L5 $H0:$L6;
+TEST $CLI volume set $V0 health-check-interval 1000;
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+#corrupt last disk
+dd if=/dev/urandom of=/dev/mapper/patchy_snap_vg_6-brick_lvm bs=512K count=200 status=progress && sync
+
+
+# Test the disk is now returning EIO for touch and ls
+EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^1$" fop_on_bad_disk "$L6"
+EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^2$" ls_fop_on_bad_disk "$L6"
+
+TEST touch $M0/foo{1..100}
+TEST $CLI volume remove-brick $V0 replica 3 $H0:$L4 $H0:$L5 $H0:$L6 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$L4 $H0:$L5 $H0:$L6";
+
+#check that remove-brick status should not have any failed or skipped files
+var=`$CLI volume remove-brick $V0 $H0:$L4 $H0:$L5 $H0:$L6 status | grep completed`
+TEST [ `echo $var | awk '{print $5}'` = "0" ]
+TEST [ `echo $var | awk '{print $6}'` = "0" ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
new file mode 100644
index 00000000000..14dfae89135
--- /dev/null
+++ b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function time_stamps_match {
+ path=$1
+ mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
+ atime_source_b0=$(get_atime $B0/${V0}0/$path)
+ mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
+ atime_source_b2=$(get_atime $B0/${V0}2/$path)
+ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
+ atime_sink_b1=$(get_atime $B0/${V0}1/$path)
+
+ #The same brick must be the source of heal for both atime and mtime.
+ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1 && $atime_source_b0 -eq $atime_sink_b1 ) || \
+ ( $mtime_source_b2 -eq $mtime_sink_b1 && $atime_source_b2 -eq $atime_sink_b1 ) ]]
+ then
+ echo "Y"
+ else
+ echo "Mtimes: $mtime_source_b0:$mtime_sink_b1:$mtime_source_b2 Atimes: $atime_source_b0:$atime_sink_b1:$atime_source_b2"
+ fi
+
+}
+
+function mtimes_match {
+ path=$1
+ mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
+ mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
+ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
+
+ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1) || \
+ ( $mtime_source_b2 -eq $mtime_sink_b1) ]]
+ then
+ echo "Y"
+ else
+ echo "Mtimes: $mtime_source_b0:$mtime_sink_b1:$mtime_source_b2"
+ fi
+
+}
+
+# Test that the parent dir's timestamps are restored during entry-heal.
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+###############################################################################
+TEST mkdir $M0/DIR
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/DIR/FILE
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+EXPECT "Y" time_stamps_match DIR
+ctime_source1=$(get_ctime $B0/${V0}0/$path)
+ctime_source2=$(get_ctime $B0/${V0}2/$path)
+ctime_sink=$(get_ctime $B0/${V0}1/$path)
+TEST [ $ctime_source1 -eq $ctime_sink ]
+TEST [ $ctime_source2 -eq $ctime_sink ]
+
+
+###############################################################################
+# Repeat the test with ctime feature disabled.
+TEST $CLI volume set $V0 features.ctime off
+TEST mkdir $M0/DIR2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/DIR2/FILE
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+#Executing parallel heal may lead to changing atime after heal. So better
+#to test just the mtime
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+EXPECT "Y" mtimes_match DIR2
+
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+cleanup;
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
new file mode 100644
index 00000000000..011535066f9
--- /dev/null
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+function get_cumulative_opendir_count {
+#sed command prints content between Cumulative and Interval, this keeps content from Cumulative stats
+ $CLI volume profile $V0 info |sed -n '/^Cumulative/,/^Interval/p'|grep OPENDIR| awk '{print $8}'|tr -d '\n'
+}
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST ! $CLI volume heal $V0
+
+# Enable shd and verify that index crawl is triggered immediately.
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info clear
+TEST $CLI volume heal $V0 enable
+# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
+EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
+
+# Check that a change in heal-timeout is honoured immediately.
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+sleep 10
+# Two crawls must have happened.
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
+
+# shd must not heal if it is disabled and heal-timeout is changed.
+TEST $CLI volume heal $V0 disable
+#Wait for configuration update and any opendir fops to complete
+sleep 10
+TEST $CLI volume profile $V0 info clear
+TEST $CLI volume set $V0 cluster.heal-timeout 6
+#Better to wait for more than 6 seconds to account for configuration updates
+sleep 10
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+TEST [ -z $COUNT ]
+cleanup;
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
new file mode 100644
index 00000000000..96279084065
--- /dev/null
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+function check_gfid_and_link_count
+{
+ local file=$1
+
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
+ TEST [ ! -z $file_gfid_b0 ]
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
+ EXPECT $file_gfid_b0 echo $file_gfid_b1
+ EXPECT $file_gfid_b0 echo $file_gfid_b2
+
+ EXPECT "2" stat -c %h $B0/${V0}0/$file
+ EXPECT "2" stat -c %h $B0/${V0}1/$file
+ EXPECT "2" stat -c %h $B0/${V0}2/$file
+}
+TESTS_EXPECTED_IN_LOOP=18
+
+################################################################################
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 disable
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST `echo "File 1 " > $M0/dir/file1`
+TEST touch $M0/dir/file{2..4}
+
+# Remove file2 from 1st & 3rd bricks
+TEST rm -f $B0/$V0"0"/dir/file2
+TEST rm -f $B0/$V0"2"/dir/file2
+
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
+TEST rm -f $B0/$V0"0"/dir/file3
+TEST rm -f $B0/$V0"1"/dir/file3
+
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
+
+# B0 and B2 blame each other
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir on first brick.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# All the files must be present on all the bricks after conservative merge and
+# should have the gfid xattr and the .glusterfs hardlink.
+check_gfid_and_link_count dir/file1
+check_gfid_and_link_count dir/file2
+check_gfid_and_link_count dir/file3
+check_gfid_and_link_count dir/file4
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
new file mode 100644
index 00000000000..c1bdf34ee6d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard enable
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Create split-brain by setting afr xattrs/gfids manually.
+#file1 is non-sharded and will be in data split-brain.
+#file2 will have one shard which will be in data split-brain.
+#file3 will have one shard which will be in gfid split-brain.
+#file4 will have one shard which will be in data & metadata split-brain.
+TEST dd if=/dev/zero of=$M0/file1 bs=1024 count=1024 oflag=direct
+TEST dd if=/dev/zero of=$M0/file2 bs=1M count=6 oflag=direct
+TEST dd if=/dev/zero of=$M0/file3 bs=1M count=6 oflag=direct
+TEST dd if=/dev/zero of=$M0/file4 bs=1M count=6 oflag=direct
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#-------------------------------------------------------------------------------
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/file1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/file1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/file1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/file1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/file1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/file1
+
+#-------------------------------------------------------------------------------
+gfid_f2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file2))
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1
+
+#-------------------------------------------------------------------------------
+TESTS_EXPECTED_IN_LOOP=5
+function assign_new_gfid {
+ brickpath=$1
+ filename=$2
+ gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/$filename))
+ gfid_shard=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/.shard/$gfid.1))
+
+ TEST rm $brickpath/.glusterfs/${gfid_shard:0:2}/${gfid_shard:2:2}/$gfid_shard
+ TEST setfattr -x trusted.gfid $brickpath/.shard/$gfid.1
+ new_gfid=$(get_random_gfid)
+ new_gfid_str=$(gf_gfid_xattr_to_str $new_gfid)
+ TEST setfattr -n trusted.gfid -v $new_gfid $brickpath/.shard/$gfid.1
+ TEST mkdir -p $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}
+ TEST ln $brickpath/.shard/$gfid.1 $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}/$new_gfid_str
+}
+assign_new_gfid $B0/$V0"1" file3
+assign_new_gfid $B0/$V0"2" file3
+
+#-------------------------------------------------------------------------------
+gfid_f4=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file4))
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1
+
+#-------------------------------------------------------------------------------
+#Add entry to xattrop dir on first brick and check for split-brain.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+
+gfid_f1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f1
+
+gfid_f2_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f2.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f2_shard1
+
+gfid_f3=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file3))
+gfid_f3_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f3.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f3_shard1
+
+gfid_f4_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f4.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f4_shard1
+
+#-------------------------------------------------------------------------------
+#gfid split-brain won't show up in split-brain count.
+EXPECT "3" afr_get_split_brain_count $V0
+EXPECT_NOT "^0$" get_pending_heal_count $V0
+
+#Resolve split-brains
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file1
+GFIDSTR="gfid:$gfid_f2_shard1"
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f3.1
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f4.1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
new file mode 100644
index 00000000000..7e24eaec03d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup
+
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0..2}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST touch $M0/a
+sleep 1
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/a
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+mtime0=$(get_mtime $B0/brick0/a)
+mtime1=$(get_mtime $B0/brick1/a)
+TEST [ $mtime0 -eq $mtime1 ]
+
+ctime0=$(get_ctime $B0/brick0/a)
+ctime1=$(get_ctime $B0/brick1/a)
+TEST [ $ctime0 -eq $ctime1 ]
+
+###############################################################################
+# Repeat the test with ctime feature disabled.
+TEST $CLI volume set $V0 features.ctime off
+
+TEST touch $M0/b
+sleep 1
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/b
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+mtime2=$(get_mtime $B0/brick0/b)
+mtime3=$(get_mtime $B0/brick1/b)
+TEST [ $mtime2 -eq $mtime3 ]
+
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t
new file mode 100644
index 00000000000..94b465181fa
--- /dev/null
+++ b/tests/bugs/replicate/bug-1801624-entry-heal.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 heal-timeout 5
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+echo "Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+
+# Re-create the file when a brick is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST rm $M0/FILE
+echo "New Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0
+
+# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices.
+$CLI volume heal $V0 # CLI will fail but heal is launched anyway.
+TEST sleep 5 # give index heal a chance to do one run.
+brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/)
+brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/)
+TEST [ $brick0_pending -eq "000000000000000000000002" ]
+TEST [ $brick2_pending -eq "000000000000000000000002" ]
+EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
+EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+$CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST cat $M0/FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-830665.t b/tests/bugs/replicate/bug-830665.t
index acebe3ec917..68180424803 100755
--- a/tests/bugs/replicate/bug-830665.t
+++ b/tests/bugs/replicate/bug-830665.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
function recreate {
diff --git a/tests/bugs/replicate/bug-880898.t b/tests/bugs/replicate/bug-880898.t
index 123e7e16425..660d34ca25f 100644
--- a/tests/bugs/replicate/bug-880898.t
+++ b/tests/bugs/replicate/bug-880898.t
@@ -1,12 +1,19 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
cleanup;
TEST glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2
TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
pkill glusterfs
uuid=""
for line in $(cat $GLUSTERD_WORKDIR/glusterd.info)
diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t
index fee82054cc3..9a8f36c956c 100755
--- a/tests/bugs/replicate/bug-977797.t
+++ b/tests/bugs/replicate/bug-977797.t
@@ -26,8 +26,11 @@ TEST $CLI volume set $V0 quick-read off
TEST $CLI volume set $V0 read-ahead off
TEST $CLI volume set $V0 write-behind off
TEST $CLI volume set $V0 io-cache off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
-TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
TEST mkdir -p $M0/a
@@ -74,7 +77,7 @@ afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-1 "dat
EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-0 "entry"
-EXPECT_WITHIN HEAL_TIMEOUT "00000000" \
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-1 "entry"
EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
new file mode 100644
index 00000000000..fab94b71b27
--- /dev/null
+++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function create_bricks {
+ TEST truncate -s 100M $B0/brick0
+ TEST truncate -s 100M $B0/brick1
+ TEST truncate -s 20M $B0/brick2
+ LO1=`SETUP_LOOP $B0/brick0`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO1
+ LO2=`SETUP_LOOP $B0/brick1`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO2
+ LO3=`SETUP_LOOP $B0/brick2`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO3
+ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+ TEST MOUNT_LOOP $LO1 $B0/${V0}0
+ TEST MOUNT_LOOP $LO2 $B0/${V0}1
+ TEST MOUNT_LOOP $LO3 $B0/${V0}2
+}
+
+function create_files {
+ local i=1
+ while (true)
+ do
+ touch $M0/file$i
+ if [ -e $B0/${V0}2/file$i ];
+ then
+ ((i++))
+ else
+ break
+ fi
+ done
+}
+
+TESTS_EXPECTED_IN_LOOP=13
+
+#Arbiter volume: Check for ENOSPC when arbiter brick becomes full#
+TEST glusterd
+create_bricks
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+create_files
+TEST kill_brick $V0 $H0 $B0/${V0}1
+error1=$(touch $M0/file-1 2>&1)
+EXPECT "No space left on device" echo $error1
+error2=$(mkdir $M0/dir-1 2>&1)
+EXPECT "No space left on device" echo $error2
+error3=$((echo "Test" > $M0/file-3) 2>&1)
+EXPECT "No space left on device" echo $error3
+
+cleanup
+
+#Replica-3 volume: Check for ENOSPC when one of the brick becomes full#
+#Keeping the third brick of lower size to simulate disk full scenario#
+TEST glusterd
+create_bricks
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+create_files
+TEST kill_brick $V0 $H0 $B0/${V0}1
+error1=$(touch $M0/file-1 2>&1)
+EXPECT "No space left on device" echo $error1
+error2=$(mkdir $M0/dir-1 2>&1)
+EXPECT "No space left on device" echo $error2
+error3=$((cat /dev/zero > $M0/file1) 2>&1)
+EXPECT "No space left on device" echo $error3
+
+cleanup
diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t
new file mode 100644
index 00000000000..d3b0c504c80
--- /dev/null
+++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+echo "Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+
+# Change permission on brick-0: simulates the case where there is metadata
+# mismatch but no pending xattrs. This brick will become the source for heal.
+TEST chmod +x $B0/$V0"0"/FILE
+
+# Add gfid to xattrop
+xattrop_b0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_b0`
+gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE))
+TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Brick-0 should contain xattrs blaming other 2 bricks.
+# The values will be zero because heal is over.
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/FILE
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}0/FILE
+
+# Brick-1 and Brick-2 must not contain any afr xattrs.
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}2/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}2/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
+
+# check permission bits.
+EXPECT '755' stat -c %a $B0/${V0}0/FILE
+EXPECT '755' stat -c %a $B0/${V0}1/FILE
+EXPECT '755' stat -c %a $B0/${V0}2/FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/ta-inode-refresh-read.t b/tests/bugs/replicate/ta-inode-refresh-read.t
new file mode 100644
index 00000000000..6dd6ff7f163
--- /dev/null
+++ b/tests/bugs/replicate/ta-inode-refresh-read.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Test read transaction inode refresh logic for thin-arbiter.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+# Set afr xlator options to choose brick0 as read-subvol.
+sed -i '/iam-self-heal-daemon/a \ option read-subvolume-index 0' $B0/mount.vol
+TEST [ $? -eq 0 ]
+sed -i '/iam-self-heal-daemon/a \ option choose-local false' $B0/mount.vol
+TEST [ $? -eq 0 ]
+
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/FILE
+TEST ls $B0/brick0/FILE
+TEST ls $B0/brick1/FILE
+TEST ! ls $B0/ta/FILE
+TEST setfattr -n user.name -v ravi $M0/FILE
+
+# Remove gfid hardlink from brick0 which is the read-subvol for FILE.
+# This triggers inode refresh up on a getfattr and eventually calls
+# afr_ta_read_txn(). Without this patch, afr_ta_read_txn() will again query
+# brick0 causing getfattr to fail.
+TEST rm -f $(gf_get_gfid_backend_file_path $B0/brick0 FILE)
+TEST getfattr -n user.name $M0/FILE
+
+cleanup;
diff --git a/tests/bugs/rpc/bug-1043886.t b/tests/bugs/rpc/bug-1043886.t
index b18680289ae..c1ea7a71e8b 100755
--- a/tests/bugs/rpc/bug-1043886.t
+++ b/tests/bugs/rpc/bug-1043886.t
@@ -3,6 +3,8 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
TEST glusterd
diff --git a/tests/bugs/rpc/bug-847624.t b/tests/bugs/rpc/bug-847624.t
index 31a63b56a34..fe8fc982887 100755
--- a/tests/bugs/rpc/bug-847624.t
+++ b/tests/bugs/rpc/bug-847624.t
@@ -3,6 +3,9 @@
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup
#1
diff --git a/tests/bugs/rpc/bug-921072.t b/tests/bugs/rpc/bug-921072.t
index 458996b57bf..ae7eb0101bc 100755
--- a/tests/bugs/rpc/bug-921072.t
+++ b/tests/bugs/rpc/bug-921072.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
#1
diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t
index 65af274f09d..40acdc2fdc7 100755
--- a/tests/bugs/rpc/bug-954057.t
+++ b/tests/bugs/rpc/bug-954057.t
@@ -25,7 +25,15 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
TEST mkdir $M0/dir
TEST mkdir $M0/nobody
-TEST chown nfsnobody:nfsnobody $M0/nobody
+grep nfsnobody /etc/passwd > /dev/null
+if [ $? -eq 1 ]; then
+usr=nobody
+grp=nobody
+else
+usr=nfsnobody
+grp=nfsnobody
+fi
+TEST chown $usr:$grp $M0/nobody
TEST `echo "file" >> $M0/file`
TEST cp $M0/file $M0/new
TEST chmod 700 $M0/new
diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t
index 762887051fa..66e896ad0c4 100644
--- a/tests/bugs/shard/bug-1272986.t
+++ b/tests/bugs/shard/bug-1272986.t
@@ -16,16 +16,16 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1
# Write some data into a file, such that its size crosses the shard block size.
-TEST dd if=/dev/zero of=$M1/file bs=1M count=5 conv=notrunc
+TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct
md5sum1_reader=$(md5sum $M0/file | awk '{print $1}')
EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'`
# Append some more data into the file.
-TEST `echo "abcdefg" >> $M1/file`
+TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct
-md5sum2_reader=$(md5sum $M0/file | awk '{print $1}')
+md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}')
# Test to see if the reader refreshes its cache correctly as part of the reads
# triggered through md5sum. If it does, then the md5sum on the reader and writer
diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t
new file mode 100644
index 00000000000..8d3a67a36be
--- /dev/null
+++ b/tests/bugs/shard/bug-1669077.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SHARD_COUNT_TIME=5
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# If the bug still exists, client should crash during fallocate below
+TEST fallocate -l 200M $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
new file mode 100644
index 00000000000..3e4a65af19a
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 120
+TEST $CLI volume set $V0 features.shard-deletion-rate 120
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2
+
+# Create a file
+TEST touch $M0/file1
+
+# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit
+TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+rm -f $(dirname $0)/bug-1696136
+
+cleanup
diff --git a/tests/bugs/shard/bug-1696136.c b/tests/bugs/shard/bug-1696136.c
new file mode 100644
index 00000000000..cb650535b09
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136.c
@@ -0,0 +1,122 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+enum fallocate_flag {
+ TEST_FALLOCATE_NONE,
+ TEST_FALLOCATE_KEEP_SIZE,
+ TEST_FALLOCATE_ZERO_RANGE,
+ TEST_FALLOCATE_PUNCH_HOLE,
+ TEST_FALLOCATE_MAX,
+};
+
+int
+get_fallocate_flag(int opcode)
+{
+ int ret = 0;
+
+ switch (opcode) {
+ case TEST_FALLOCATE_NONE:
+ ret = 0;
+ break;
+ case TEST_FALLOCATE_KEEP_SIZE:
+ ret = FALLOC_FL_KEEP_SIZE;
+ break;
+ case TEST_FALLOCATE_ZERO_RANGE:
+ ret = FALLOC_FL_ZERO_RANGE;
+ break;
+ case TEST_FALLOCATE_PUNCH_HOLE:
+ ret = FALLOC_FL_PUNCH_HOLE;
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 1;
+ int opcode = -1;
+ off_t offset = 0;
+ size_t len = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 8) {
+ fprintf(stderr,
+ "Syntax: %s <host> <volname> <opcode> <offset> <len> "
+ "<file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, argv[7], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ opcode = atoi(argv[3]);
+ opcode = get_fallocate_flag(opcode);
+ if (opcode < 0) {
+ fprintf(stderr, "get_fallocate_flag: invalid flag \n");
+ goto out;
+ }
+
+ /* Note that off_t is signed but size_t isn't. */
+ offset = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
+
+ fd = glfs_open(fs, argv[6], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = glfs_fallocate(fd, opcode, offset, len);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fallocate: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_unlink(fs, argv[6]);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink: returned %d\n", ret);
+ goto out;
+ }
+ /* Sleep for 3s to give enough time for background deletion to complete
+ * during which if the bug exists, the process will crash.
+ */
+ sleep(3);
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/bug-1696136.t b/tests/bugs/shard/bug-1696136.t
new file mode 100644
index 00000000000..b6dc858f083
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 120
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2
+
+# Create a file
+TEST touch $M0/file1
+
+# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit
+TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+rm -f $(dirname $0)/bug-1696136
+
+cleanup
diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
new file mode 100644
index 00000000000..f6e50376a58
--- /dev/null
+++ b/tests/bugs/shard/bug-1705884.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+require_fallocate -l 1m $M0/file
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 200M $M0/foo
+EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo
+TEST truncate -s 0 $M0/foo
+EXPECT "0" stat -c %b $M0/foo
+TEST fallocate -l 100M $M0/foo
+EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1738419.t b/tests/bugs/shard/bug-1738419.t
new file mode 100644
index 00000000000..8d0a31d9754
--- /dev/null
+++ b/tests/bugs/shard/bug-1738419.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 network.remote-dio off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.strict-o-direct on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/zero of=$M0/metadata bs=501 count=1
+
+EXPECT "501" echo $("dd" if=$M0/metadata bs=4096 count=1 of=/dev/null iflag=direct 2>&1 | awk '/bytes/ {print $1}')
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-shard-discard.c b/tests/bugs/shard/bug-shard-discard.c
index 15dca6c2181..6fa93fb89d1 100644
--- a/tests/bugs/shard/bug-shard-discard.c
+++ b/tests/bugs/shard/bug-shard-discard.c
@@ -50,8 +50,9 @@ main(int argc, char *argv[])
goto out;
}
- off = atoi(argv[4]);
- len = atoi(argv[5]);
+ /* Note that off_t is signed but size_t isn't. */
+ off = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
ret = glfs_discard(fd, off, len);
if (ret <= 0) {
diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t
new file mode 100644
index 00000000000..ba22d2b74fe
--- /dev/null
+++ b/tests/bugs/shard/issue-1243.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.strict-o-direct on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST $CLI volume set $V0 md-cache-timeout 10
+
+# Write data into a file such that its size crosses shard-block-size
+TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Execute a setxattr on the file.
+TEST setfattr -n trusted.libvirt -v some-value $M0/foo
+
+# Size of the file should be the aggregated size, not the shard-block-size
+EXPECT '8388608' stat -c %s $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Execute a removexattr on the file.
+TEST setfattr -x trusted.libvirt $M0/foo
+
+# Size of the file should be the aggregated size, not the shard-block-size
+EXPECT '8388608' stat -c %s $M0/foo
+cleanup
diff --git a/tests/bugs/shard/issue-1281.t b/tests/bugs/shard/issue-1281.t
new file mode 100644
index 00000000000..9704caa8944
--- /dev/null
+++ b/tests/bugs/shard/issue-1281.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+#Open a file and store descriptor in fd = 5
+exec 5>$M0/foo
+
+#Unlink the same file which is opened in prev step
+TEST unlink $M0/foo
+
+#Write something on the file using the open fd = 5
+echo "issue-1281" >&5
+
+#Write on the descriptor should be succesful
+EXPECT 0 echo $?
+
+#Close the fd = 5
+exec 5>&-
+
+cleanup
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
new file mode 100644
index 00000000000..bbe82c0e5b2
--- /dev/null
+++ b/tests/bugs/shard/issue-1425.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 20M $M0/foo
+gfid_new=$(get_gfid_string $M0/foo)
+
+# Check for the base shard
+TEST stat $M0/foo
+TEST stat $B0/${V0}0/foo
+
+# There should be 4 associated shards
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
+
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+
+# Delete the base shard and check shards get cleaned up
+TEST unlink $M0/foo
+
+TEST ! stat $M0/foo
+TEST ! stat $B0/${V0}0/foo
+
+# There should be no shards now
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+cleanup
diff --git a/tests/bugs/shard/shard-fallocate.c b/tests/bugs/shard/shard-fallocate.c
index 3a784d3c02c..cb0714e8564 100644
--- a/tests/bugs/shard/shard-fallocate.c
+++ b/tests/bugs/shard/shard-fallocate.c
@@ -87,8 +87,9 @@ main(int argc, char *argv[])
goto out;
}
- offset = atoi(argv[4]);
- len = atoi(argv[5]);
+ /* Note that off_t is signed but size_t isn't. */
+ offset = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
fd = glfs_open(fs, argv[6], O_RDWR);
if (fd == NULL) {
@@ -97,7 +98,7 @@ main(int argc, char *argv[])
}
ret = glfs_fallocate(fd, opcode, offset, len);
- if (ret <= 0) {
+ if (ret < 0) {
fprintf(stderr, "glfs_fallocate: returned %d\n", ret);
goto out;
}
diff --git a/tests/bugs/shard/zero-flag.t b/tests/bugs/shard/zero-flag.t
index 84cb9635a1b..1f39787ab9f 100644
--- a/tests/bugs/shard/zero-flag.t
+++ b/tests/bugs/shard/zero-flag.t
@@ -14,6 +14,7 @@ TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
TEST $CLI volume start $V0
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
diff --git a/tests/bugs/snapshot/bug-1109889.t b/tests/bugs/snapshot/bug-1109889.t
index 6b29cdd9eb1..5fdc7dc9506 100644
--- a/tests/bugs/snapshot/bug-1109889.t
+++ b/tests/bugs/snapshot/bug-1109889.t
@@ -19,9 +19,9 @@ TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
TEST $CLI volume start $V0;
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
-MOUNT_PID=`ps ax |grep "glusterfs --volfile-sever $H0 --volfile-id=$V0 $M0" | grep -v grep | awk '{print $1}' | head -1`
+MOUNT_PID=$(get_mount_process_pid $V0 $M0)
for i in {1..10} ; do echo "file" > $M0/file$i ; done
diff --git a/tests/bugs/snapshot/bug-1111041.t b/tests/bugs/snapshot/bug-1111041.t
index f771d64f2a3..efda9688d8b 100755
--- a/tests/bugs/snapshot/bug-1111041.t
+++ b/tests/bugs/snapshot/bug-1111041.t
@@ -11,6 +11,10 @@ function is_snapd_running {
$CLI volume status $1 | grep "Snapshot Daemon" | wc -l;
}
+function snapd_pid {
+ $CLI volume status $V0 | grep "Snapshot Daemon" | awk {'print $8'}
+}
+
TEST glusterd;
TEST pidof glusterd;
@@ -25,14 +29,12 @@ TEST $CLI volume set $V0 features.uss enable;
EXPECT "1" is_snapd_running $V0
-SNAPD_PID=$($CLI volume status $V0 | grep "Snapshot Daemon" | awk {'print $8'});
+SNAPD_PID=$(snapd_pid);
TEST [ $SNAPD_PID -gt 0 ]
kill -9 $SNAPD_PID
-SNAPD_PID=$($CLI volume status $V0 | grep "Snapshot Daemon" | awk {'print $8'});
-
-TEST [ $SNAPD_PID = 'N/A' ]
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^N/A$" snapd_pid
cleanup ;
diff --git a/tests/bugs/snapshot/bug-1140162-file-snapshot-features-encrypt-opts-validation.t b/tests/bugs/snapshot/bug-1140162-file-snapshot-features-encrypt-opts-validation.t
deleted file mode 100644
index c536c8261e4..00000000000
--- a/tests/bugs/snapshot/bug-1140162-file-snapshot-features-encrypt-opts-validation.t
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-## Test case for BZ-1140160 Volume option set <vol> <file-snapshot> and
-## <features.encryption> <value> command input should validate correctly.
-
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../volume.rc
-
-cleanup;
-
-## Start glusterd
-TEST glusterd;
-TEST pidof glusterd;
-
-## Lets create and start volume
-TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
-TEST $CLI volume start $V0
-
-## Set features.file-snapshot and features.encryption option with non-boolean
-## value. These options should fail.
-TEST ! $CLI volume set $V0 features.file-snapshot abcd
-TEST ! $CLI volume set $V0 features.encryption redhat
-
-## Set other options with valid value. These options should succeed.
-TEST $CLI volume set $V0 barrier enable
-TEST $CLI volume set $V0 ping-timeout 60
-
-## Set features.file-snapshot and features.encryption option with valid boolean
-## value. These options should succeed.
-TEST $CLI volume set $V0 features.file-snapshot on
-
-## Before setting the crypt xlator on, it is required to create master key
-## Otherwise glusterfs client process will fail to start
-echo "0000111122223333444455556666777788889999aaaabbbbccccddddeeeeffff" > $GLUSTERD_WORKDIR/$V0-master-key
-
-## Specify location of master key
-TEST $CLI volume set $V0 encryption.master-key $GLUSTERD_WORKDIR/$V0-master-key
-
-TEST $CLI volume set $V0 features.encryption on
-
-cleanup;
-#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
-#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/snapshot/bug-1166197.t b/tests/bugs/snapshot/bug-1166197.t
index 7350acfa9ed..b070ae271ba 100755
--- a/tests/bugs/snapshot/bug-1166197.t
+++ b/tests/bugs/snapshot/bug-1166197.t
@@ -5,6 +5,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
CURDIR=`pwd`
diff --git a/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t b/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t
index 6b069ac2432..52a7a790b97 100644
--- a/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t
+++ b/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../snapshot.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
# This function returns a value "Y" if user can execute
# the given command. Else it will return "N"
# @arg-1 : Name of the user
diff --git a/tests/bugs/snapshot/bug-1227646.t b/tests/bugs/snapshot/bug-1227646.t
index a16a8c270da..9b73dfdb32f 100644
--- a/tests/bugs/snapshot/bug-1227646.t
+++ b/tests/bugs/snapshot/bug-1227646.t
@@ -20,7 +20,6 @@ TEST $CLI snapshot create snap1 $V0 no-timestamp;
TEST $CLI volume stop $V0
TEST $CLI snapshot restore snap1;
TEST $CLI volume start $V0
-TEST $CLI volume tier $V0 attach $H0:$L1 $H0:$L2
TEST pkill gluster
TEST glusterd
diff --git a/tests/bugs/snapshot/bug-1260848.t b/tests/bugs/snapshot/bug-1260848.t
index 7eae3982e43..6455d8297b2 100644
--- a/tests/bugs/snapshot/bug-1260848.t
+++ b/tests/bugs/snapshot/bug-1260848.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../../nfs.rc
. $(dirname $0)/../../volume.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
## Start and create a volume
diff --git a/tests/bugs/snapshot/bug-1279327.t b/tests/bugs/snapshot/bug-1279327.t
index de1b3885a1c..4e4be6eeea6 100644
--- a/tests/bugs/snapshot/bug-1279327.t
+++ b/tests/bugs/snapshot/bug-1279327.t
@@ -15,7 +15,6 @@ TEST setup_lvm 3
TEST $CLI volume create $V0 $H0:$L1
TEST $CLI volume start $V0
TEST $CLI volume quota $V0 enable
-TEST $CLI volume tier $V0 attach replica 2 $H0:$L2 $H0:$L3
TEST $CLI snapshot create snap1 $V0 no-timestamp
TEST $CLI snapshot activate snap1
diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
index f30194b6339..04a85db0c1a 100644
--- a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
+++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
@@ -130,3 +130,4 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" mounted_snaps ${V1}
cleanup;
# run first!
+#G_TESTDEF_TEST_STATUS_CENTOS6=BRICK_MUX_BAD_TEST,BUG=1743069
diff --git a/tests/bugs/snapshot/bug-1597662.t b/tests/bugs/snapshot/bug-1597662.t
index dc87d17a0ef..f582930476a 100644
--- a/tests/bugs/snapshot/bug-1597662.t
+++ b/tests/bugs/snapshot/bug-1597662.t
@@ -34,12 +34,13 @@ function is_snap_path
EXPECT "1" is_snap_path
$CLI snapshot deactivate snap1;
-
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} 'Stopped' snapshot_status snap1
# snap is deactivated so snap_path should not exist
EXPECT "0" is_snap_path
# activate snap again
$CLI snapshot activate snap1;
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} 'Started' snapshot_status snap1
# snap is active so snap_path should exist
EXPECT "1" is_snap_path
diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t
index d4c07024ed0..8070bc1b83c 100755
--- a/tests/bugs/transport/bug-873367.t
+++ b/tests/bugs/transport/bug-873367.t
@@ -13,7 +13,7 @@ rm -f $SSL_BASE/glusterfs.*
mkdir -p $B0/1
mkdir -p $M0
-TEST openssl genrsa -out $SSL_KEY 1024
+TEST openssl genrsa -out $SSL_KEY 2048
TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
ln $SSL_CERT $SSL_CA
diff --git a/tests/bugs/write-behind/issue-884.c b/tests/bugs/write-behind/issue-884.c
new file mode 100644
index 00000000000..e9c33b351ad
--- /dev/null
+++ b/tests/bugs/write-behind/issue-884.c
@@ -0,0 +1,267 @@
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+
+#include <glusterfs/api/glfs.h>
+
+/* Based on a reproducer by Stefan Ring. It seems to be quite sensible to any
+ * timing modification, so the code has been maintained as is, only with minor
+ * changes. */
+
+struct glfs *glfs;
+
+pthread_mutex_t the_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t the_cond = PTHREAD_COND_INITIALIZER;
+
+typedef struct _my_aiocb {
+ int64_t size;
+ volatile int64_t seq;
+ int which;
+} my_aiocb;
+
+typedef struct _worker_data {
+ my_aiocb cb;
+ struct iovec iov;
+ int64_t offset;
+} worker_data;
+
+typedef struct {
+ worker_data wdata[2];
+
+ volatile unsigned busy;
+} all_data_t;
+
+all_data_t all_data;
+
+static void
+completion_fnc(struct glfs_fd *fd, ssize_t ret, struct glfs_stat *pre,
+ struct glfs_stat *post, void *arg)
+{
+ void *the_thread;
+ my_aiocb *cb = (my_aiocb *)arg;
+ long seq = cb->seq;
+
+ assert(ret == cb->size);
+
+ pthread_mutex_lock(&the_mutex);
+ pthread_cond_broadcast(&the_cond);
+
+ all_data.busy &= ~(1 << cb->which);
+ cb->seq = -1;
+
+ the_thread = (void *)pthread_self();
+ printf("worker %d is done from thread %p, seq %ld!\n", cb->which,
+ the_thread, seq);
+
+ pthread_mutex_unlock(&the_mutex);
+}
+
+static void
+init_wdata(worker_data *data, int which)
+{
+ data->cb.which = which;
+ data->cb.seq = -1;
+
+ data->iov.iov_base = malloc(1024 * 1024);
+ memset(data->iov.iov_base, 6,
+ 1024 * 1024); /* tail part never overwritten */
+}
+
+static void
+init()
+{
+ all_data.busy = 0;
+
+ init_wdata(&all_data.wdata[0], 0);
+ init_wdata(&all_data.wdata[1], 1);
+}
+
+static void
+do_write(struct glfs_fd *fd, int content, int size, int64_t seq,
+ worker_data *wdata, const char *name)
+{
+ int ret;
+
+ wdata->cb.size = size;
+ wdata->cb.seq = seq;
+
+ if (content >= 0)
+ memset(wdata->iov.iov_base, content, size);
+ wdata->iov.iov_len = size;
+
+ pthread_mutex_lock(&the_mutex);
+ printf("(%d) dispatching write \"%s\", offset %lx, len %x, seq %ld\n",
+ wdata->cb.which, name, (long)wdata->offset, size, (long)seq);
+ pthread_mutex_unlock(&the_mutex);
+ ret = glfs_pwritev_async(fd, &wdata->iov, 1, wdata->offset, 0,
+ completion_fnc, &wdata->cb);
+ assert(ret >= 0);
+}
+
+#define IDLE 0 // both workers must be idle
+#define ANY 1 // use any worker, other one may be busy
+
+int
+get_worker(int waitfor, int64_t excl_seq)
+{
+ int which;
+
+ pthread_mutex_lock(&the_mutex);
+
+ while (waitfor == IDLE && (all_data.busy & 3) != 0 ||
+ waitfor == ANY &&
+ ((all_data.busy & 3) == 3 ||
+ excl_seq >= 0 && (all_data.wdata[0].cb.seq == excl_seq ||
+ all_data.wdata[1].cb.seq == excl_seq)))
+ pthread_cond_wait(&the_cond, &the_mutex);
+
+ if (!(all_data.busy & 1))
+ which = 0;
+ else
+ which = 1;
+
+ all_data.busy |= (1 << which);
+
+ pthread_mutex_unlock(&the_mutex);
+
+ return which;
+}
+
+static int
+doit(struct glfs_fd *fd)
+{
+ int ret;
+ int64_t seq = 0;
+ int64_t offset = 0; // position in file, in blocks
+ int64_t base = 0x1000; // where to place the data, in blocks
+
+ int async_mode = ANY;
+
+ init();
+
+ for (;;) {
+ int which;
+ worker_data *wdata;
+
+ // for growing to the first offset
+ for (;;) {
+ int gap = base + 0x42 - offset;
+ if (!gap)
+ break;
+ if (gap > 80)
+ gap = 80;
+
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = offset << 9;
+ do_write(fd, 0, gap << 9, seq++, wdata, "gap-filling");
+
+ offset += gap;
+ }
+
+ // 8700
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x42) << 9;
+ do_write(fd, 1, 62 << 9, seq++, wdata, "!8700");
+
+ // 8701
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x42) << 9;
+ do_write(fd, 2, 55 << 9, seq++, wdata, "!8701");
+
+ // 8702
+ which = get_worker(async_mode, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x79) << 9;
+ do_write(fd, 3, 54 << 9, seq++, wdata, "!8702");
+
+ // 8703
+ which = get_worker(async_mode, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0xaf) << 9;
+ do_write(fd, 4, 81 << 9, seq++, wdata, "!8703");
+
+ // 8704
+ // this writes both 5s and 6s
+ // the range of 5s is the one that overwrites 8703
+
+ which = get_worker(async_mode, seq - 1);
+ wdata = &all_data.wdata[which];
+
+ memset(wdata->iov.iov_base, 5, 81 << 9);
+ wdata->offset = (base + 0xaf) << 9;
+ do_write(fd, -1, 1623 << 9, seq++, wdata, "!8704");
+
+ offset = base + 0x706;
+ base += 0x1000;
+ if (base >= 0x100000)
+ break;
+ }
+
+ printf("done!\n");
+ fflush(stdout);
+
+ pthread_mutex_lock(&the_mutex);
+
+ while ((all_data.busy & 3) != 0)
+ pthread_cond_wait(&the_cond, &the_mutex);
+
+ pthread_mutex_unlock(&the_mutex);
+
+ ret = glfs_close(fd);
+ assert(ret >= 0);
+ /*
+ ret = glfs_fini(glfs);
+ assert(ret >= 0);
+ */
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret;
+ int open_flags = O_RDWR | O_DIRECT | O_TRUNC;
+ struct glfs_fd *fd;
+
+ glfs = glfs_new(argv[1]);
+ if (!glfs) {
+ printf("glfs_new!\n");
+ goto out;
+ }
+ ret = glfs_set_volfile_server(glfs, "tcp", "localhost", 24007);
+ if (ret < 0) {
+ printf("set_volfile!\n");
+ goto out;
+ }
+ ret = glfs_init(glfs);
+ if (ret) {
+ printf("init!\n");
+ goto out;
+ }
+ fd = glfs_open(glfs, argv[2], open_flags);
+ if (!fd) {
+ printf("open!\n");
+ goto out;
+ }
+ srand(time(NULL));
+ return doit(fd);
+out:
+ return 1;
+}
diff --git a/tests/bugs/write-behind/issue-884.t b/tests/bugs/write-behind/issue-884.t
new file mode 100755
index 00000000000..2bcf7d15265
--- /dev/null
+++ b/tests/bugs/write-behind/issue-884.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test tries to detect a race condition in write-behind. It's based on a
+# reproducer written by Stefan Ring that is able to hit it sometimes. On my
+# system, it happened around 10% of the runs. This means that if this bug
+# appears again, this test will fail once every 10 runs. Most probably this
+# failure will be hidden by the automatic test retry of the testing framework.
+#
+# Please, if this test fails, it needs to be analyzed in detail.
+
+function run() {
+ "${@}" >/dev/null
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+# This makes it easier to hit the issue
+TEST $CLI volume set $V0 client-log-level TRACE
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+build_tester $(dirname $0)/issue-884.c -lgfapi
+
+TEST touch $M0/testfile
+
+# This program generates a file of 535694336 bytes with a fixed pattern
+TEST run $(dirname $0)/issue-884 $V0 testfile
+
+# This is the md5sum of the expected pattern without corruption
+EXPECT "ad105f9349345a70fc697632cbb5eec8" echo "$(md5sum $B0/$V0/testfile | awk '{ print $1; }')"
+
+cleanup
diff --git a/tests/cluster.rc b/tests/cluster.rc
index 99be8e79c21..34f5b02398f 100644
--- a/tests/cluster.rc
+++ b/tests/cluster.rc
@@ -11,7 +11,7 @@ function launch_cluster() {
define_backends $count;
define_hosts $count;
define_glusterds $count $2;
- define_clis $count;
+ define_clis $count $3;
start_glusterds;
}
@@ -50,15 +50,16 @@ function define_glusterds() {
sopt="management.glusterd-sockfile=${!b}/glusterd/gd.sock"
#Get the logdir
logdir=`gluster --print-logdir`
+ clopt="management.cluster-test-mode=${logdir}/$i";
#Fetch the testcases name and prefix the glusterd log with it
logfile=`echo ${0##*/}`_glusterd$i.log
- lopt="--log-file=$logdir/$logfile"
+ lopt="--log-file=$logdir/$i/$logfile"
if [ "$2" == "-LDEBUG" ]; then
- eval "glusterd_$i='glusterd -LDEBUG --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt $lopt $popt'";
- eval "glusterd$i='glusterd -LDEBUG --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt $lopt $popt'";
+ eval "glusterd_$i='glusterd -LDEBUG --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ eval "glusterd$i='glusterd -LDEBUG --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
else
- eval "glusterd_$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt $lopt $popt'";
- eval "glusterd$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt $lopt $popt'";
+ eval "glusterd_$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ eval "glusterd$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
fi
done
}
@@ -89,6 +90,20 @@ function kill_glusterd() {
kill `cat $pidfile`;
}
+function restart_glusterd() {
+ local index=$1
+ local b
+ local pidfile
+ local g
+
+ b="B$index"
+ pidfile="${!b}/glusterd.pid"
+
+ kill `cat $pidfile`
+
+ g="glusterd_${index}"
+ ${!g}
+}
function kill_node() {
local index=$1;
@@ -133,8 +148,13 @@ function define_clis() {
lopt1="--log-file=$logdir/$logfile1"
- eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
- eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
+ if [ "$2" == "-NO_FORCE" ]; then
+ eval "CLI_$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
+ eval "CLI$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
+ else
+ eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
+ eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
+ fi
done
}
@@ -191,3 +211,8 @@ function cluster_brick_up_status {
eval \$CLI_$1 volume status $vol $host:$brick --xml | sed -ne 's/.*<status>\([01]\)<\/status>/\1/p'
}
+function cluster_remove_brick_status_completed_field {
+ local vol=$1
+ local brick_list=$2
+ $CLI_1 volume remove-brick $vol $brick_list status | awk '{print $7}' | sed -n 3p
+}
diff --git a/tests/ec.rc b/tests/ec.rc
index 04405ecb829..f18752fc99a 100644
--- a/tests/ec.rc
+++ b/tests/ec.rc
@@ -7,3 +7,12 @@ function ec_up_status()
local ec_id=$3
grep -E "^up =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='
}
+
+function ec_option_value()
+{
+ local v=$1
+ local m=$2
+ local ec_id=$3
+ local opt=$4
+ grep -E "^$opt =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='| awk '{print $1}'
+}
diff --git a/tests/encryption/crypt.t b/tests/encryption/crypt.t
deleted file mode 100755
index 2f965b0e8b7..00000000000
--- a/tests/encryption/crypt.t
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/bin/bash
-
-. $(dirname $0)/../include.rc
-. $(dirname $0)/../volume.rc
-
-cleanup;
-
-TEST glusterd
-TEST pidof glusterd
-
-## Create a volume with one brick
-TEST $CLI volume create $V0 $H0:$B0/${V0}1;
-EXPECT "$V0" volinfo_field $V0 'Volume Name';
-EXPECT 'Created' volinfo_field $V0 'Status';
-EXPECT '1' brick_count $V0
-
-## Turn off performance translators
-
-TEST $CLI volume set $V0 performance.quick-read off
-EXPECT 'off' volinfo_field $V0 'performance.quick-read'
-TEST $CLI volume set $V0 performance.write-behind off
-EXPECT 'off' volinfo_field $V0 'performance.write-behind'
-TEST $CLI volume set $V0 performance.open-behind off
-EXPECT 'off' volinfo_field $V0 'performance.open-behind'
-
-## Create a file with master key
-
-echo "0000111122223333444455556666777788889999aaaabbbbccccddddeeeeffff" > $GLUSTERD_WORKDIR/$V0-master-key
-
-## Specify location of master key
-TEST $CLI volume set $V0 encryption.master-key $GLUSTERD_WORKDIR/$V0-master-key
-
-## Turn on crypt xlator by setting features.encryption to on
-TEST $CLI volume set $V0 encryption on
-EXPECT 'on' volinfo_field $V0 'features.encryption'
-
-## Start the volume
-TEST $CLI volume start $V0;
-EXPECT 'Started' volinfo_field $V0 'Status';
-
-## Mount the volume
-TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
-
-## Testing writev, readv, ftruncate:
-## Create fragmented files and compare them with the reference files
-
-build_tester $(dirname $0)/frag.c
-TEST $(dirname $0)/frag $M0/testfile /tmp/$V0-goodfile 262144 500
-
-## Testing link, unlink, symlink, rename
-
-TEST ln $M0/testfile $M0/testfile-link
-TEST mv $M0/testfile $M0/testfile-renamed
-TEST ln -s $M0/testfile-link $M0/testfile-symlink
-TEST rm -f $M0/testfile-renamed
-
-## Remount the volume
-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
-
-TEST diff -u $M0/testfile-symlink /tmp/$V0-goodfile
-EXPECT ''
-
-TEST rm -f $M0/testfile-symlink
-TEST rm -f $M0/testfile-link
-
-## Cleanup files
-
-TEST rm -f /tmp/$V0-master-key
-TEST rm -f /tmp/$V0-goodfile
-
-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
-
-## Reset crypt options
-TEST $CLI volume reset $V0 encryption.block-size
-TEST $CLI volume reset $V0 encryption.data-key-size
-
-## Stop the volume
-TEST $CLI volume stop $V0;
-EXPECT 'Stopped' volinfo_field $V0 'Status';
-
-## Delete the volume
-TEST $CLI volume delete $V0;
-TEST ! $CLI volume info $V0;
-
-TEST rm -rf $(dirname $0)/frag
-cleanup;
-
-#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
-#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/encryption/frag.c b/tests/encryption/frag.c
deleted file mode 100644
index 7b4510f6a4d..00000000000
--- a/tests/encryption/frag.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-#define MAX_NUM_OPS (1 << 20)
-#define MAX_FILE_SIZE (1 << 30)
-
-typedef enum { READ_OP, WRITE_OP, TRUNC_OP, LAST_OP } frag_op;
-
-struct frag_ctx {
- int test_fd;
- int good_fd;
- char *test_buf;
- char *good_buf;
- char *content;
- int max_file_size;
-};
-
-typedef int (*frag_op_t)(struct frag_ctx *ctx, off_t offset, size_t count);
-
-static int
-doread(int fd, off_t offset, size_t count, char *buf, int max_file_size)
-{
- int ret = 0;
- int was_read = 0;
-
- if (lseek(fd, offset, SEEK_SET) == -1) {
- perror("lseek failed");
- return -1;
- }
- while (count) {
- ret = read(fd, buf + offset + was_read, count);
- if (ret < 0)
- return -1;
- if (ret == 0)
- break;
- if (ret > count) {
- fprintf(stderr, "READ: read more than asked\n");
- return -1;
- }
- count -= ret;
- was_read += ret;
- }
- return ret;
-}
-
-static int
-dowrite(int fd, off_t offset, size_t count, char *buf)
-{
- int ret;
-
- ret = lseek(fd, offset, SEEK_SET);
- if (ret == -1)
- return ret;
- return write(fd, buf, count);
-}
-
-static int
-dotrunc(int fd, off_t offset)
-{
- int ret;
-
- ret = ftruncate(fd, offset);
- if (ret == -1)
- perror("truncate failed");
- return ret;
-}
-
-static int
-prepare_file(char *filename, int *fd, char **buf, int max_file_size)
-{
- int ret;
-
- *buf = malloc(max_file_size);
- if (*buf == NULL) {
- perror("malloc failed");
- return -1;
- }
- *fd = open(filename, O_CREAT | O_RDWR, S_IRWXU);
- if (*fd == -1) {
- perror("open failed");
- free(*buf);
- *buf = NULL;
- return -1;
- }
- return 0;
-}
-
-/*
- * @offset, @count: random values from [0, max_file_size - 1]
- */
-static int
-frag_write(struct frag_ctx *ctx, off_t offset, size_t count)
-{
- int ret;
- struct stat test_stbuf;
- struct stat good_stbuf;
-
- if (offset + count > ctx->max_file_size)
- offset = offset / 2;
- if (offset + count > ctx->max_file_size)
- count = count / 2;
-
- if (fstat(ctx->test_fd, &test_stbuf)) {
- fprintf(stderr, "WRITE: fstat of test file failed\n");
- return -1;
- }
- if (offset > test_stbuf.st_size)
- printf("writing hole\n");
-
- ret = dowrite(ctx->test_fd, offset, count, ctx->content);
- if (ret < 0 || ret != count) {
- fprintf(stderr, "WRITE: failed to write test file\n");
- return -1;
- }
- ret = dowrite(ctx->good_fd, offset, count, ctx->content);
- if (ret < 0 || ret != count) {
- fprintf(stderr, "WRITE: failed to write test file\n");
- return -1;
- }
- if (fstat(ctx->test_fd, &test_stbuf)) {
- fprintf(stderr, "WRITE: fstat of test file failed\n");
- return -1;
- }
- if (fstat(ctx->good_fd, &good_stbuf)) {
- fprintf(stderr, "WRITE: fstat of good file failed\n");
- return -1;
- }
- if (test_stbuf.st_size != good_stbuf.st_size) {
- fprintf(stderr, "READ: Bad file size %d (expected %d)\n",
- (int)test_stbuf.st_size, (int)good_stbuf.st_size);
- return -1;
- }
- return 0;
-}
-
-/*
- * @offset, @count: random values from [0, max_file_size - 1]
- */
-static int
-frag_read(struct frag_ctx *ctx, off_t offset, size_t count)
-{
- ssize_t test_ret;
- ssize_t good_ret;
-
- test_ret = doread(ctx->test_fd, offset, count, ctx->test_buf,
- ctx->max_file_size);
- if (test_ret < 0) {
- fprintf(stderr, "READ: failed to read test file\n");
- return -1;
- }
- good_ret = doread(ctx->good_fd, offset, count, ctx->good_buf,
- ctx->max_file_size);
- if (good_ret < 0) {
- fprintf(stderr, "READ: failed to read good file\n");
- return -1;
- }
- if (test_ret != good_ret) {
- fprintf(stderr, "READ: Bad return value %d (expected %d\n)", test_ret,
- good_ret);
- return -1;
- }
- if (memcmp(ctx->test_buf + offset, ctx->good_buf + offset, good_ret)) {
- fprintf(stderr, "READ: bad data\n");
- return -1;
- }
- return 0;
-}
-
-/*
- * @offset: random value from [0, max_file_size - 1]
- */
-static int
-frag_truncate(struct frag_ctx *ctx, off_t offset,
- __attribute__((unused)) size_t count)
-{
- int ret;
- struct stat test_stbuf;
- struct stat good_stbuf;
-
- if (fstat(ctx->test_fd, &test_stbuf)) {
- fprintf(stderr, "TRUNCATE: fstat of test file failed\n");
- return -1;
- }
- if (offset > test_stbuf.st_size)
- printf("expanding truncate to %d\n", offset);
- else if (offset < test_stbuf.st_size)
- printf("shrinking truncate to %d\n", offset);
- else
- printf("trivial truncate\n");
-
- ret = dotrunc(ctx->test_fd, offset);
- if (ret == -1) {
- fprintf(stderr, "TRUNCATE: failed for test file\n");
- return -1;
- }
- ret = dotrunc(ctx->good_fd, offset);
- if (ret == -1) {
- fprintf(stderr, "TRUNCATE: failed for good file\n");
- return -1;
- }
- if (fstat(ctx->test_fd, &test_stbuf)) {
- fprintf(stderr, "TRUNCATE: fstat of test file failed\n");
- return -1;
- }
- if (fstat(ctx->good_fd, &good_stbuf)) {
- fprintf(stderr, "TRUNCATE: fstat of good file failed\n");
- return -1;
- }
- if (test_stbuf.st_size != good_stbuf.st_size) {
- fprintf(stderr, "TRUNCATE: bad test file size %d (expected %d)\n",
- test_stbuf.st_size, good_stbuf.st_size);
- return -1;
- }
- return 0;
-}
-
-frag_op_t frag_ops[LAST_OP] = {[READ_OP] = frag_read,
- [WRITE_OP] = frag_write,
- [TRUNC_OP] = frag_truncate};
-
-static void
-put_ctx(struct frag_ctx *ctx)
-{
- if (ctx->test_buf)
- free(ctx->test_buf);
- if (ctx->good_buf)
- free(ctx->good_buf);
- if (ctx->content)
- free(ctx->content);
-}
-
-main(int argc, char *argv[])
-{
- int i;
- int ret = 0;
- struct frag_ctx ctx;
- char *test_filename = NULL;
- char *good_filename = NULL;
- int num_ops;
- int max_file_size;
-
- memset(&ctx, 0, sizeof(ctx));
- if (argc != 5) {
- fprintf(stderr,
- "usage: %s <test-file-name> <good-file-name> <max-file-size> "
- "<number-of-operations>\n",
- argv[0]);
- ret = -1;
- goto exit;
- }
- test_filename = argv[1];
- good_filename = argv[2];
- max_file_size = atoi(argv[3]);
- if (max_file_size > MAX_FILE_SIZE)
- max_file_size = MAX_FILE_SIZE;
- num_ops = atoi(argv[4]);
- if (num_ops > MAX_NUM_OPS)
- num_ops = MAX_NUM_OPS;
-
- ret = prepare_file(test_filename, &ctx.test_fd, &ctx.test_buf,
- max_file_size);
- if (ret)
- goto exit;
- ret = prepare_file(good_filename, &ctx.good_fd, &ctx.good_buf,
- max_file_size);
- if (ret) {
- if (close(ctx.test_fd) == -1)
- perror("close test_buf failed");
- goto exit;
- }
- ctx.content = malloc(max_file_size);
- if (!ctx.content) {
- perror("malloc failed");
- goto close;
- }
- ctx.max_file_size = max_file_size;
- for (i = 0; i < max_file_size; i++)
- ctx.content[i] = random() % 256;
-
- for (i = 0; i < num_ops; i++) {
- ret = frag_ops[random() % LAST_OP](
- &ctx, random() % max_file_size, /* offset */
- random() % max_file_size /* count */);
- if (ret)
- break;
- }
-close:
- if (close(ctx.test_fd) == -1)
- perror("close test_fd failed");
- if (close(ctx.good_fd) == -1)
- perror("close good_fd failed");
-exit:
- put_ctx(&ctx);
- if (ret)
- exit(1);
- exit(0);
-}
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/tests/env.rc.in b/tests/env.rc.in
index 8d692f7066c..0478d66aec6 100644
--- a/tests/env.rc.in
+++ b/tests/env.rc.in
@@ -2,7 +2,7 @@ prefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
-PATH=@sbindir@:$PATH
+PATH=@bindir@:@sbindir@:$PATH
export PATH
GLUSTERD_PIDFILEDIR=@localstatedir@/run/gluster
@@ -37,3 +37,6 @@ export GLUSTER_CMD_DIR
GLUSTER_LIBEXECDIR=@GLUSTERFS_LIBEXECDIR@
export GLUSTER_LIBEXECDIR
+
+RUN_NFS_TESTS=@BUILD_GNFS@
+export RUN_NFS_TESTS
diff --git a/tests/features/delay-gen.t b/tests/features/delay-gen.t
index 712b5b60065..72e6dbb7697 100755
--- a/tests/features/delay-gen.t
+++ b/tests/features/delay-gen.t
@@ -36,5 +36,17 @@ create_max_latency=$($CLI volume profile $V0 info | grep CREATE | awk 'BEGIN {ma
TEST [ ! -z $write_max_latency ];
TEST [ -z $create_max_latency ];
+# Not providing a particular fop will make it test everything
+TEST $CLI volume reset $V0 delay-gen.enable
+TEST $CLI volume set $V0 delay-gen.delay-duration 100
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0 >/dev/null
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
+$(dirname $0)/../basic/rpc-coverage.sh $M0 >/dev/null
+
cleanup;
#G_TESTDEF_TEST_STATUS_NETBSD7=1501397
diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t
new file mode 100644
index 00000000000..b8717e30dfb
--- /dev/null
+++ b/tests/features/flock_interrupt.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0;
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/testfile;
+
+echo > got_lock
+flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok > got_lock; } &
+
+EXPECT_WITHIN 4 ok cat got_lock;
+
+## Finish up
+rm -f got_lock;
+cleanup;
diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t
new file mode 100644
index 00000000000..dd6be2d5397
--- /dev/null
+++ b/tests/features/fuse-lru-limit.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "2" online_brick_count
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+mkdir ${M0}/dir-{1..9}
+for i in {1..9}; do
+ for j in {1..1000}; do
+ echo "Test file" > ${M0}/dir-$i/file-$j;
+ done;
+done
+lc=$(get_mount_lru_size_value $V0 ${M0})
+# ideally it should be 9000+
+TEST [ $lc -ge 9000 ]
+
+TEST umount $M0
+
+TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0
+
+TEST find $M0
+lc=$(get_mount_lru_size_value $V0 ${M0})
+# ideally it should be <1000
+# Not sure if there are any possibilities of buffer need.
+TEST [ $lc -le 1000 ]
+
+TEST rm -rf $M0/*
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+cleanup
diff --git a/tests/features/interrupt.t b/tests/features/interrupt.t
index bd70ff87545..067eb1b7486 100644
--- a/tests/features/interrupt.t
+++ b/tests/features/interrupt.t
@@ -43,18 +43,22 @@ function test_interrupt {
# If the test helper fails (which is considered a setup error, not failure of the test
# case itself), kill will be invoked without argument, and that will be the actual
# error which is caught.
- TEST "./$(dirname $0)/open_and_sleep $M0/testfile | { sleep 0.1; xargs -n1 kill -INT; }"
+ TEST "./$(dirname $0)/open_and_sleep $M0/testfile-$handlebool | { sleep 0.1; xargs -n1 kill -INT; }"
TEST "grep -E '$logpattern' $log_file"
# Basic sanity check, making sure filesystem has not crashed.
- TEST test -f $M0/testfile
+ TEST test -f $M0/testfile-$handlebool
}
# Theoretically FLUSH might finish before INTERRUPT is handled,
-# in which case we'd get the "no handler found" message (but it's unlikely).
-test_interrupt yes 'FLUSH.*interrupt handler triggered|INTERRUPT.*no handler found'
+# in which case we'd get the "no handler found" message instead of
+# "interrupt handler triggered" (but it's unlikely).
+# If that's observed, the pattern can be changed to
+# 'FLUSH.*interrupt handler triggered|[I]NTERRUPT.*no handler found'
+# to fix the test.
+test_interrupt yes '[F]LUSH.*interrupt handler triggered'
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
-test_interrupt no 'INTERRUPT.*no handler found'
+test_interrupt no '[I]NTERRUPT.*no handler found'
## Finish up
TEST $CLI volume stop $V0;
diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
index 3cb45b5e582..497083e5a3a 100755
--- a/tests/features/ssl-authz.t
+++ b/tests/features/ssl-authz.t
@@ -25,6 +25,7 @@ TEST glusterd
TEST pidof glusterd
TEST $CLI volume info;
+TEST $CLI v set all cluster.brick-multiplex on
# Construct a cipher list that excludes CBC because of POODLE.
# http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566
#
@@ -41,16 +42,16 @@ function valid_ciphers {
-e '/:$/s///'
}
-TEST openssl genrsa -out $SSL_KEY 1024
+TEST openssl genrsa -out $SSL_KEY 2048
TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
ln $SSL_CERT $SSL_CA
-TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force
TEST $CLI volume set $V0 server.ssl on
TEST $CLI volume set $V0 client.ssl on
TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers)
TEST $CLI volume start $V0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
# This mount should SUCCEED because ssl-allow=* by default. This effectively
# disables SSL authorization, though authentication and encryption might still
@@ -59,11 +60,28 @@ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
TEST ping_file $M0/before
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+glusterfsd_pid=`pgrep glusterfsd`
+TEST [ $glusterfsd_pid != 0 ]
+start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+echo "Memory consumption for glusterfsd process"
+for i in $(seq 1 100); do
+ gluster v heal $V0 info >/dev/null
+done
+#Wait to cleanup memory
+sleep 10
+end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+diff=$((end-start))
+
+# If memory consumption is more than 15M some leak in SSL code path
+
+TEST [ $diff -lt 15000 ]
+
+
# Set ssl-allow to a wildcard that includes our identity.
TEST $CLI volume stop $V0
TEST $CLI volume set $V0 auth.ssl-allow Any*
TEST $CLI volume start $V0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
# This mount should SUCCEED because we match the wildcard.
TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t
index 563d37c5277..b70fe360e02 100644
--- a/tests/features/ssl-ciphers.t
+++ b/tests/features/ssl-ciphers.t
@@ -33,18 +33,26 @@ wait_mount() {
openssl_connect() {
ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA"
ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR"
- #echo openssl s_client $ssl_opt $@ > /dev/tty
- #read -p "Continue? " nothing
- CIPHER=`echo "" |
- openssl s_client $ssl_opt $@ 2>/dev/null |
- awk '/^ Cipher/{print $3}'`
- if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" ] ; then
+ cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null"
+ CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
+ if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then
echo "N"
else
echo "Y"
fi
}
+#Validate the cipher to pass EXPECT test case before call openssl_connect
+check_cipher() {
+ cmd="echo "" | openssl s_client $@ 2> /dev/null"
+ cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
+ if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
cleanup;
mkdir -p $B0
mkdir -p $M0
@@ -65,7 +73,7 @@ TEST glusterd
TEST pidof glusterd
TEST $CLI volume info;
-TEST openssl genrsa -out $SSL_KEY 1024 2>/dev/null
+TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null
TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \
-subj /CN=CA -out $SSL_CA
TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \
@@ -102,32 +110,47 @@ EXPECT "N" openssl_connect -ssl2 -connect $H0:$BRICK_PORT
# Test SSLv3 protocol fails
EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT
-# Test TLSv1 protocol fails
-EXPECT "N" openssl_connect -tls1 -connect $H0:$BRICK_PORT
+# Test TLSv1 protocol based on openssl version
+cmd="openssl version"
+ver=$(eval $cmd | awk -F " " '{print $2}' | grep "^1.1")
+if [ "x${ver}" = "x" ]; then
+ supp="N"
+else
+ supp="Y"
+fi
+EXPECT "${supp}" openssl_connect -tls1 -connect $H0:$BRICK_PORT
# Test a HIGH CBC cipher
-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
# Test EECDH
-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
# test MD5 fails
-EXPECT "N" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT
# test RC4 fails
-EXPECT "N" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT
# test eNULL fails
-EXPECT "N" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT
# test SHA2
-EXPECT "Y" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT
# test GCM
-EXPECT "Y" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT
# Test DH fails without DH params
-EXPECT "N" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
# Test DH with DH params
TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem
@@ -145,8 +168,10 @@ TEST $CLI volume stop $V0
TEST $CLI volume start $V0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
BRICK_PORT=`brick_port $V0`
-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
-EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
# Test the ec-curve option
TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1
@@ -155,8 +180,10 @@ TEST $CLI volume stop $V0
TEST $CLI volume start $V0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
BRICK_PORT=`brick_port $V0`
-EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
TEST $CLI volume set $V0 ssl.ec-curve invalid
EXPECT invalid volume_option $V0 ssl.ec-curve
@@ -164,7 +191,8 @@ TEST $CLI volume stop $V0
TEST $CLI volume start $V0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
BRICK_PORT=`brick_port $V0`
-EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
TEST $CLI volume set $V0 ssl.ec-curve secp521r1
EXPECT secp521r1 volume_option $V0 ssl.ec-curve
@@ -175,8 +203,6 @@ BRICK_PORT=`brick_port $V0`
EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
# test revocation
-# no need to restart the volume since the options are used
-# by the client here.
TEST $CLI volume set $V0 ssl.crl-path $TMPDIR
EXPECT $TMPDIR volume_option $V0 ssl.crl-path
$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
@@ -189,14 +215,25 @@ TEST openssl ca -batch -config $SSL_CFG -revoke $SSL_CERT 2>&1
TEST openssl ca -config $SSL_CFG -gencrl -out $SSL_CRL 2>&1
# Failed once revoked
+# Although client fails to mount without restarting the server after crl-path
+# is set when no actual crl file is found on the client, it would also fail
+# when server is restarted for the same reason. Since the socket initialization
+# code is the same for client and server, the crl verification flags need to
+# be turned off for the client to avoid SSL searching for CRLs in the
+# ssl.crl-path. If no CRL files are found in the ssl.crl-path, SSL fails the
+# connect() attempt on the client.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
EXPECT "N" wait_mount $M0
TEST ! test -f $TEST_FILE
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
# Succeed with CRL disabled
+TEST $CLI volume stop $V0
TEST $CLI volume set $V0 ssl.crl-path NULL
EXPECT NULL volume_option $V0 ssl.crl-path
+TEST $CLI volume start $V0
$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
EXPECT "Y" wait_mount $M0
TEST test -f $TEST_FILE
diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t
index 8401946fb0f..a02bd6befc4 100644
--- a/tests/features/subdir-mount.t
+++ b/tests/features/subdir-mount.t
@@ -85,12 +85,17 @@ TEST $CLI volume start $V0
TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2
TEST stat $M2
+initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
# mount shouldn't fail even after add-brick
TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6};
-# Give time for client process to get notified and use the new
-# volfile after add-brick
-sleep 1
+# Wait to execute create-subdir-mounts.sh script by glusterd
+newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+while [ $newcnt -eq $initcnt ]
+do
+ newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+ sleep 1
+done
# Existing mount should still be active
mount_inode=$(stat --format "%i" "$M2")
diff --git a/tests/features/trash.t b/tests/features/trash.t
index 472e909e567..da5b50bc85a 100755
--- a/tests/features/trash.t
+++ b/tests/features/trash.t
@@ -94,105 +94,105 @@ wildcard_not_exists() {
if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
}
-# testing glusterd [1-3]
+# testing glusterd
TEST glusterd
TEST pidof glusterd
TEST $CLI volume info
-# creating distributed volume [4]
+# creating distributed volume
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
-# checking volume status [5-7]
+# checking volume status
EXPECT "$V0" volinfo_field $V0 'Volume Name'
EXPECT 'Created' volinfo_field $V0 'Status'
EXPECT '2' brick_count $V0
-# test without enabling trash translator [8]
+# test without enabling trash translator
TEST start_vol $V0 $M0
-# test on enabling trash translator [9-10]
+# test on enabling trash translator
TEST $CLI volume set $V0 features.trash on
EXPECT 'on' volinfo_field $V0 'features.trash'
-# files directly under mount point [11]
+# files directly under mount point
create_files $M0/file1 $M0/file2
TEST file_exists $V0 file1 file2
-# perform unlink [12]
+# perform unlink
TEST unlink_op file1
-# perform truncate [13]
+# perform truncate
TEST truncate_op file2 4
-# create files directory hierarchy and check [14]
+# create files directory hierarchy and check
mkdir -p $M0/1/2/3
create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
-# perform unlink [15]
+# perform unlink
TEST unlink_op 1/2/3/foo1
-# perform truncate [16]
+# perform truncate
TEST truncate_op 1/2/3/foo2 4
# create a directory for eliminate pattern
mkdir $M0/a
-# set the eliminate pattern [17-18]
+# set the eliminate pattern
TEST $CLI volume set $V0 features.trash-eliminate-path /a
EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
-# create two files and check [19]
+# create two files and check
create_files $M0/a/test1 $M0/a/test2
TEST file_exists $V0 a/test1 a/test2
-# remove from eliminate pattern [20]
+# remove from eliminate pattern
rm -f $M0/a/test1
EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
-# truncate from eliminate path [21-23]
+# truncate from eliminate path
truncate -s 2 $M0/a/test2
TEST [ -e $M0/a/test2 ]
TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
-# set internal op on [24-25]
+# set internal op on
TEST $CLI volume set $V0 features.trash-internal-op on
EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
-# again create two files and check [26]
+# again create two files and check
create_files $M0/inop1 $M0/inop2
TEST file_exists $V0 inop1 inop2
-# perform unlink [27]
+# perform unlink
TEST unlink_op inop1
-# perform truncate [28]
+# perform truncate
TEST truncate_op inop2 4
-# remove one brick and restart the volume [28-31]
+# remove one brick and restart the volume
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
TEST $CLI volume stop $V0
TEST start_vol $V0 $M0 $M0/.trashcan
-# again create two files and check [33]
+# again create two files and check
create_files $M0/rebal1 $M0/rebal2
TEST file_exists $V0 rebal1 rebal2
-# add one brick [34-35]
+# add one brick
TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
TEST [ -d $B0/${V0}3 ]
-# perform rebalance [36]
+# perform rebalance
TEST $CLI volume rebalance $V0 start force
EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
#Find out which file was migrated to the new brick
file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
-# check whether rebalance was succesful [37-40]
+# check whether rebalance was succesful
EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
# force required in case rebalance is not over
TEST $CLI volume stop $V0 force
-# create a replicated volume [41]
+# create a replicated volume
TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
-# checking volume status [42-45]
+# checking volume status
EXPECT "$V1" volinfo_field $V1 'Volume Name'
EXPECT 'Replicate' volinfo_field $V1 'Type'
EXPECT 'Created' volinfo_field $V1 'Status'
EXPECT '2' brick_count $V1
-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50]
+# enable trash with options and start the replicate volume by disabling automatic self-heal
TEST $CLI volume set $V1 features.trash on
TEST $CLI volume set $V1 features.trash-internal-op on
EXPECT 'on' volinfo_field $V1 'features.trash'
EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
TEST start_vol $V1 $M1 $M1/.trashcan
-# mount and check for trash directory [51]
+# mount and check for trash directory
TEST [ -d $M1/.trashcan/internal_op ]
-# create a file and check [52]
+# create a file and check
touch $M1/self
TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
-# kill one brick and delete the file from mount point [53-54]
+# kill one brick and delete the file from mount point
kill_brick $V1 $H0 $B0/${V1}1
EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
rm -f $M1/self
EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
-# force start the volume and trigger the self-heal manually [55-57]
-TEST $CLI volume start $V1 force
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
-# Since we created the file under root of the volume, it will be
-# healed automatically
-
-# check for the removed file in trashcan [58]
-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self*
-
-# check renaming of trash directory through cli [59-62]
+# check renaming of trash directory through cli
TEST $CLI volume set $V0 trash-dir abc
TEST start_vol $V0 $M0 $M0/abc
TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
-# ensure that rename and delete operation on trash directory fails [63-65]
+# ensure that rename and delete operation on trash directory fails
rm -rf $M0/abc/internal_op
TEST [ -e $M0/abc/internal_op ]
rm -rf $M0/abc/
diff --git a/tests/features/worm.t b/tests/features/worm.t
index 9766dc1ad2b..40b08cdee02 100755
--- a/tests/features/worm.t
+++ b/tests/features/worm.t
@@ -72,6 +72,45 @@ sleep 10
TEST `echo "worm 1" >> $M0/file4`
TEST ! rm -f $M0/file4
+## Test for state transition if auto-commit-period is 0
+TEST $CLI volume set $V0 features.auto-commit-period 0
+TEST `echo "worm 1" > $M0/file5`
+EXPECT '3/10/0' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file5)
+EXPECT 'worm 1' cat $M0/file5
+TEST ! rm -f $M0/file5
+TEST $CLI volume set $V0 features.auto-commit-period 5
+
+## Test for checking if retention-period is updated on increasing the access time of a WORM-RETAINED file.
+TEST $CLI volume set $V0 features.worm-files-deletable 1
+TEST `echo "worm 1" >> $M0/file1`
+initial_timestamp=$(date +%s)
+current_time_seconds=$(date +%S | sed 's/^0*//' );
+TEST chmod 0444 $M0/file1
+EXPECT '3/10/5' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+changed_timestamp=$(date +%Y%m%d%H%M --date '60 seconds');
+seconds_diff=`expr 60 - $((current_time_seconds))`
+TEST `touch -a -t "${changed_timestamp}" $M0/file1`
+EXPECT "3/$seconds_diff/5" echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+sleep $seconds_diff
+TEST `echo "worm 2" >> $M0/file1`
+EXPECT "$initial_timestamp" echo $(stat --printf %X $M0/file1)
+
+
+## Test for checking if retention-period is updated on decreasing the access time of a WORM-RETAINED file
+TEST $CLI volume set $V0 features.default-retention-period 120
+initial_timestamp=$(date +%s)
+current_time_seconds=$(date +%S | sed 's/^0*//' );
+TEST chmod 0444 $M0/file1
+EXPECT '3/120/5' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+changed_timestamp=$(date +%Y%m%d%H%M --date '60 seconds');
+seconds_diff=`expr 60 - $((current_time_seconds))`
+TEST `touch -a -t "${changed_timestamp}" $M0/file1`
+EXPECT "3/$seconds_diff/5" echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+sleep $seconds_diff
+TEST `echo "worm 4" >> $M0/file1`
+EXPECT "$initial_timestamp" echo $(stat --printf %X $M0/file1)
+TEST rm -f $M0/file1
+
TEST $CLI volume stop $V0
EXPECT 'Stopped' volinfo_field $V0 'Status'
diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
index c33ceaaa64e..9ba4262730e 100644
--- a/tests/geo-rep.rc
+++ b/tests/geo-rep.rc
@@ -1,4 +1,9 @@
GEO_REP_TIMEOUT=120
+CHECK_MOUNT_TIMEOUT=50
+#check for mount point
+function check_mounted () {
+ df | grep $1 | wc -l
+}
function check_status_num_rows()
{
@@ -6,6 +11,37 @@ function check_status_num_rows()
$GEOREP_CLI $master $slave status | grep -F "$search_key" | wc -l
}
+function check_fanout_status_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI $master status | grep -F "$search_key" | wc -l
+}
+
+function check_fanout_status_detail_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI $master status detail | grep -F "$search_key" | wc -l
+}
+
+function check_all_status_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI status | grep -F "$search_key" | wc -l
+}
+
+function check_all_status_detail_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI status detail | grep -F "$search_key" | wc -l
+}
+
+function verify_checkpoint_met()
+{
+ local master=$1
+ local slave=$2
+ $GEOREP_CLI $master $slave status detail| grep -F "Yes" | wc -l
+}
+
function check_keys_distributed()
{
local search_key=$(cat /var/lib/glusterd/geo-replication/master_slave_common_secret.pem.pub)
@@ -19,6 +55,18 @@ function check_common_secret_file()
echo $?
}
+function create_rename_symlink_case()
+{
+ mkdir ${mastermnt}/MUL_REN_SYMLINK
+ cd ${mastermnt}/MUL_REN_SYMLINK
+ mkdir sym_dir1
+ ln -s "sym_dir1" sym1
+ mv sym1 sym2
+ mv sym2 sym3
+ mv sym3 sym4
+ cd -
+}
+
function create_data()
{
prefix=$1
@@ -40,34 +88,70 @@ function create_data()
touch ${master_mnt}/${prefix}_f2
touch ${master_mnt}/${prefix}_f3
+ # non-ascii filename test
+ echo "Hello non-ascii" > ${master_mnt}/${prefix}_f1_ಸಂತಸ
+ touch ${master_mnt}/${prefix}_f2_ಸಂತಸ
+ touch ${master_mnt}/${prefix}_f3_ಸಂತಸ
+
# dir
mkdir ${master_mnt}/${prefix}_d1
mkdir ${master_mnt}/${prefix}_d2
mkdir ${master_mnt}/${prefix}_d3
- # Hardlink
+ # non-ascii dir and filename test
+ mkdir ${master_mnt}/${prefix}_d1_ನನ್ನ
+ mkdir ${master_mnt}/${prefix}_d2_ಸಂತಸ
+ mkdir ${master_mnt}/${prefix}_d3_ಸಂತಸ
+ echo "Hello non-ascii" > ${master_mnt}/${prefix}_d1_ನನ್ನ/ಸಂತಸ
+
+ # Hardlink + non-ascii name
ln ${master_mnt}/${prefix}_f1 ${master_mnt}/${prefix}_hl1
+ ln ${master_mnt}/${prefix}_f1 ${master_mnt}/${prefix}_hl1_ಸಂತಸ
# Symlink
cd ${master_mnt}
ln -s ${prefix}_f1 ${prefix}_sl1
+ ln -s ${prefix}_f1 ${prefix}_sl1_ಸಂತಸ
cd -
# UNLINK
rm ${master_mnt}/${prefix}_f2
+ rm ${master_mnt}/${prefix}_f2_ಸಂತಸ
# RMDIR
rmdir ${master_mnt}/${prefix}_d2
+ rmdir ${master_mnt}/${prefix}_d2_ಸಂತಸ
# Rename - File
mv ${master_mnt}/${prefix}_f3 ${master_mnt}/${prefix}_f4
+ mv ${master_mnt}/${prefix}_f3_ಸಂತಸ ${master_mnt}/${prefix}_f4_ಸಂತಸ
# Rename - Dir
mv ${master_mnt}/${prefix}_d3 ${master_mnt}/${prefix}_d4
+ mv ${master_mnt}/${prefix}_d3_ಸಂತಸ ${master_mnt}/${prefix}_d4_ಸಂತಸ
# chown
touch ${master_mnt}/${prefix}_chown_f1
chown 1000:1000 ${master_mnt}/${prefix}_chown_f1
+ touch ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ
+ chown 1000:1000 ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ
+}
+
+function create_data_hang()
+{
+ prefix=$1
+ mkdir ${master_mnt}/${prefix}
+ cd ${master_mnt}/${prefix}
+ # ~1k files is required with 1 sync-job and hang happens if
+ # stderr buffer of tar/ssh executed with Popen is full (i.e., 64k).
+ # 64k is hit when ~800 files were not found while syncing data
+ # from master. So around 1k files is required to hit the condition.
+ for i in {1..1000}
+ do
+ echo "test data" > file$i
+ mv -f file$i file
+ done
+ cd -
}
function chown_file_ok()
@@ -137,7 +221,8 @@ function arequal_checksum()
{
master=$1
slave=$2
- diff <(arequal-checksum -p $master) <(arequal-checksum -p $slave) | wc -l
+ ret=$(diff <(arequal-checksum -p $master) <(arequal-checksum -p $slave) | wc -l)
+ echo x$ret
}
function symlink_ok()
@@ -151,7 +236,7 @@ function symlink_ok()
else
local fname=$(readlink $symlink_file)
if test "X$fname" != "X$orig_file_name"; then
- echo 1
+ echo 2
else
echo 0
fi
@@ -167,7 +252,7 @@ function rename_file_ok()
if [ -f $old_name ]; then
echo 1
elif [ ! -f $new_name ]; then
- echo 1
+ echo 2
else
echo 0
fi
@@ -181,7 +266,7 @@ function rename_dir_ok()
if [ -d $old_name ]; then
echo 1
elif [ ! -d $new_name ]; then
- echo 1
+ echo 2
else
echo 0
fi
@@ -272,9 +357,9 @@ function hardlink_rename_ok()
if [ ! -f "$file_name" ]; then
echo 1
elif [ ! -f "$file_name.hl1" ]; then
- echo 1
+ echo 2
elif [ -f "$file_name.hl" ]; then
- echo 1
+ echo 3
else
echo 0
fi
@@ -294,9 +379,9 @@ function verify_symlink_rename_mkdir_data()
if [ ! -f $sym_dir/file1 ]; then
echo 1
elif [ ! -h $sym_dir/rn_sym_link ]; then
- echo 1
+ echo 2
elif [ ! -d $sym_dir/sym_link ]; then
- echo 1
+ echo 3
else
echo 0
fi
@@ -322,17 +407,17 @@ function verify_rsnapshot_data()
if [ ! -d "$dir" ]; then
echo 1
elif [ ! -h $dir/a_symlink ]; then
- echo 1
+ echo 2
elif test "X$(readlink $dir/a_symlink)" != "X/does/not/exist2"; then
- echo 1
+ echo 3
elif [ ! -h $dir0/a_symlink ]; then
- echo 1
+ echo 4
elif test "X$(readlink $dir0/a_symlink)" != "X/does/not/exist"; then
- echo 1
+ echo 5
elif [ ! -h $dir1/a_symlink ]; then
- echo 1
+ echo 6
elif test "X$(readlink $dir1/a_symlink)" != "X/does/not/exist2"; then
- echo 1
+ echo 7
else
echo 0
fi
@@ -354,13 +439,56 @@ function verify_hardlink_rename_data()
if [ ! -d $dir ]; then
echo 1
elif [ -f $dir/f1 ]; then
- echo 1
+ echo 2
elif [ -f $dir/f2 ]; then
- echo 1
+ echo 3
elif [ ! -f $dir/f3 ]; then
- echo 1
+ echo 4
elif test "Xtest_data" != "X$(cat $dir/f3)"; then
+ echo 5
+ else
+ echo 0
+ fi
+}
+
+function check_slave_read_only()
+{
+ volum=$1
+ gluster volume info $1 | grep 'features.read-only: on'
+ echo $?
+}
+
+function create_rename_with_existing_destination()
+{
+ dir=$1/rename_with_existing_destination
+ mkdir $dir
+ for i in {1..5}
+ do
+ echo "Data_set$i" > $dir/data_set$i
+ mv $dir/data_set$i $dir/data_set -f
+ done
+}
+
+function verify_rename_with_existing_destination()
+{
+ dir=$1/rename_with_existing_destination
+
+ if [ ! -d $dir ]; then
echo 1
+ elif [ ! -f $dir/data_set ]; then
+ echo 2
+ elif [ -f $dir/data_set1 ]; then
+ echo 3
+ elif [ -f $dir/data_set2 ]; then
+ echo 4
+ elif [ -f $dir/data_set3 ]; then
+ echo 5
+ elif [ -f $dir/data_set4 ]; then
+ echo 6
+ elif [ -f $dir/data_set5 ]; then
+ echo 7
+ elif test "XData_set5" != "X$(cat $dir/data_set)"; then
+ echo 8
else
echo 0
fi
diff --git a/tests/gfid2path/gfid2path_nfs.t b/tests/gfid2path/gfid2path_nfs.t
index 969aa484b82..d1ea7df2f4d 100644
--- a/tests/gfid2path/gfid2path_nfs.t
+++ b/tests/gfid2path/gfid2path_nfs.t
@@ -4,6 +4,8 @@
. $(dirname $0)/../volume.rc
. $(dirname $0)/../nfs.rc
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
cleanup;
XXHSUM_SOURCE="$(dirname $0)/../../contrib/xxhash/xxhsum.c $(dirname $0)/../../contrib/xxhash/xxhash.c"
diff --git a/tests/glusterfind/glusterfind-basic.t b/tests/glusterfind/glusterfind-basic.t
new file mode 100644
index 00000000000..ccb33fb1fc8
--- /dev/null
+++ b/tests/glusterfind/glusterfind-basic.t
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+##create .keys
+mkdir -p /var/lib/glusterd/glusterfind/.keys
+
+#create_and_start test_volume
+TEST $CLI volume create test-vol $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+TEST gluster volume start test-vol
+
+##Mount test-vol
+TEST glusterfs -s $H0 --volfile-id test-vol $M0
+
+TEST timestamp1=$(date +'%s')
+
+##Create files and dirs inside the mount point
+TEST mkdir -p $M0/dir1
+TEST touch $M0/file1
+
+##Glusterfind Create
+TEST glusterfind create sess_vol1 test-vol --force
+
+##################################################################################
+#Incremental crawl
+##################################################################################
+##Glusterfind Pre
+TEST glusterfind pre sess_vol1 test-vol output_file.txt
+
+#Glusterfind Post
+TEST glusterfind post sess_vol1 test-vol
+
+##Glusterfind List
+EXPECT '1' echo $(glusterfind list | grep sess_vol1 | wc -l)
+
+TEST timestamp2=$(date +'%s')
+
+##Glusterfind Query
+TEST glusterfind query test-vol --since-time $timestamp1 --end-time $timestamp2 output_file.txt
+
+#################################################################################
+#Full Crawl
+#################################################################################
+##Glusterfind Pre
+TEST glusterfind pre sess_vol1 test-vol output_file.txt --full --regenerate-outfile
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##Glusterfind Query commands
+TEST glusterfind query test-vol --full output_file.txt
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##using tag, full crawl
+TEST glusterfind query test-vol --full --tag-for-full-find NEW output_file.txt
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##using -field-separator option, full crawl
+glusterfind query test-vol --full output_file.txt --field-separator "=="
+EXPECT '1' echo $(grep 'NEW==dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW==file1' output_file.txt | wc -l)
+
+##Adding or Replacing a Brick from an Existing Glusterfind Session
+TEST gluster volume add-brick test-vol $H0:$B0/b4 force
+
+##To make existing session work after brick add
+TEST glusterfind create sess_vol test-vol --force
+EXPECT '1' echo $(glusterfind list | grep sess_vol1 | wc -l)
+
+##glusterfind delete
+TEST glusterfind delete sess_vol test-vol
+
+rm -rf output_file.txt
+cleanup;
diff --git a/tests/include.rc b/tests/include.rc
index d76f13b8d07..0dc7d830449 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -1,3 +1,6 @@
+
+checkpoint_time="$(date +%s%N)"
+
M0=${M0:=/mnt/glusterfs/0}; # 0th mount point for FUSE
M1=${M1:=/mnt/glusterfs/1}; # 1st mount point for FUSE
M2=${M2:=/mnt/glusterfs/2}; # 2nd mount point for FUSE
@@ -8,6 +11,7 @@ V0=${V0:=patchy}; # volume name to use in tests
V1=${V1:=patchy1}; # volume name to use in tests
GMV0=${GMV0:=master}; # master volume name to use in geo-rep tests
GSV0=${GSV0:=slave}; # slave volume name to use in geo-rep tests
+GSV1=${GSV1:=slave1}; # slave volume name to use in geo-rep tests
B0=${B0:=/d/backends}; # top level of brick directories
WORKDIRS="$B0 $M0 $M1 $M2 $M3 $N0 $N1"
@@ -76,7 +80,7 @@ NFS_EXPORT_TIMEOUT=20
CHILD_UP_TIMEOUT=20
PROBE_TIMEOUT=60
PEER_SYNC_TIMEOUT=20
-REBALANCE_TIMEOUT=360
+REBALANCE_TIMEOUT=600
REOPEN_TIMEOUT=20
HEAL_TIMEOUT=80
IO_HEAL_TIMEOUT=120
@@ -89,6 +93,7 @@ GRAPH_SWITCH_TIMEOUT=10
UNLINK_TIMEOUT=5
MDC_TIMEOUT=5
IO_WAIT_TIMEOUT=5
+DISK_FAIL_TIMEOUT=80
LOGDIR=$(gluster --print-logdir)
@@ -130,8 +135,8 @@ _GFS () {
while true; do
touch $mount_point/xy_zzy 2> /dev/null && break
i=$((i+1))
- [ $i -lt 10 ] || break
- sleep 1
+ [ $i -lt 100 ] || break
+ sleep 0.1
done
rm -f $mount_point/xy_zzy
return $mount_ret
@@ -207,6 +212,7 @@ function test_header()
dbg "=========================";
dbg "TEST $t (line $TESTLINE): $*";
saved_cmd="$*"
+ start_time="$(date +%s%N)"
}
@@ -215,15 +221,18 @@ function test_footer()
RET=$?
local lineno=$1
local err=$2
-
+ local end_time
+ local elapsed1
+ local elapsed2
+
+ end_time="$(date +%s%N)"
+ elapsed1="$(((start_time - checkpoint_time) / 1000000))"
+ elapsed2="$(((end_time - start_time) / 1000000))"
+ checkpoint_time="$end_time"
if [ $RET -eq 0 ]; then
- echo "ok $t, LINENUM:$lineno";
+ printf "ok %3d [%7d/%7d] <%4d> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd";
else
- echo "not ok $t $err, LINENUM:$lineno";
- # With DEBUG, this was already printed out, so skip it.
- if [ x"$DEBUG" = x"0" ]; then
- echo "FAILED COMMAND: $saved_cmd"
- fi
+ printf "not ok %3d [%7d/%7d] <%4d> '%s' -> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd" "$err"
if [ "$EXIT_EARLY" = "1" ]; then
cleanup
exit $RET
@@ -356,12 +365,12 @@ function _EXPECT_WITHIN()
a="";
shift;
- local endtime=$(( ${timeout}+`date +%s` ))
+ local endtime="$(( ${timeout}000000000 + $(date +%s%N) ))"
# We *want* this to be globally visible.
EW_RETRIES=0
- while [ `date +%s` -lt $endtime ]; do
+ while [[ "$(date +%s%N)" < "$endtime" ]]; do
a=$("$@" | tail -1 ; exit ${PIPESTATUS[0]})
## Check command success
if [ $? -ne 0 ]; then
@@ -371,7 +380,7 @@ function _EXPECT_WITHIN()
if [[ "$a" =~ $e ]]; then
break;
fi
- sleep 1;
+ sleep 0.25;
EW_RETRIES=$((EW_RETRIES+1))
done
@@ -544,8 +553,41 @@ function process_pids() {
echo "${pids[@]}"
}
+## Lock files should get automatically removed once "usradd" or "groupadd"
+## command finishes. But sometimes we encounter situations (bugs) where
+## some of these files may not get properly unlocked after the execution of
+## the command. In that case, when we execute useradd next time, it may show
+## the error “cannot lock /etc/password” or “unable to lock group file”.
+## So, to avoid any such errors, check for any lock files under /etc.
+## and remove those.
+
+function remove_lock_files()
+{
+ if [ ! -f /etc/passwd.lock ];
+ then
+ rm -rf /etc/passwd.lock;
+ fi
+
+ if [ ! -f /etc/group.lock ];
+ then
+ rm -rf /etc/group.lock;
+ fi
+
+ if [ ! -f /etc/shadow.lock ];
+ then
+ rm -rf /etc/shadow.lock;
+ fi
+
+ if [ ! -f /etc/gshadow.lock ];
+ then
+ rm -rf /etc/gshadow.lock;
+ fi
+}
+
+
function cleanup()
{
+ local end_time
# Prepare flags for umount
case `uname -s` in
@@ -563,6 +605,9 @@ function cleanup()
;;
esac
+ # Clean up lock files.
+ remove_lock_files
+
# Clean up all client mounts
for m in `mount | grep fuse.glusterfs | awk '{print $3}'`; do
umount $flag $m
diff --git a/tests/line-coverage/afr-heal-info.t b/tests/line-coverage/afr-heal-info.t
new file mode 100644
index 00000000000..182665917c4
--- /dev/null
+++ b/tests/line-coverage/afr-heal-info.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+#Test that parallel heal-info command execution doesn't result in spurious
+#entries with locking-scheme granular
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+
+function write_and_del_file {
+ dd of=$M0/a.txt if=/dev/zero bs=1024k count=100
+ rm -f $M0/b.txt
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 locking-scheme granular
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/a.txt $M0/b.txt
+write_and_del_file &
+touch $B0/f1 $B0/f2
+
+# All above is similar to basic/afr/heal-info.t
+
+TEST $CLI volume heal $V0 enable
+TEST $CLI volume heal $V0 info --xml
+TEST $CLI volume heal $V0 info summary
+TEST $CLI volume heal $V0 info summary --xml
+TEST $CLI volume heal $V0 info split-brain
+TEST $CLI volume heal $V0 info split-brain --xml
+
+TEST $CLI volume heal $V0 statistics heal-count
+
+# It may fail as the file is not in splitbrain
+$CLI volume heal $V0 split-brain latest-mtime /a.txt
+
+TEST $CLI volume heal $V0 disable
+
+TEST $CLI volume stop $V0
+cleanup;
diff --git a/tests/line-coverage/arbiter-coverage.t b/tests/line-coverage/arbiter-coverage.t
new file mode 100755
index 00000000000..82b470141b5
--- /dev/null
+++ b/tests/line-coverage/arbiter-coverage.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 arbiter 1 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+$(dirname $0)/../basic/rpc-coverage.sh $M1 >/dev/null
+./glfsxmp $V0 $H0 >/dev/null
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+## Finish up
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/line-coverage/cli-peer-and-volume-operations.t b/tests/line-coverage/cli-peer-and-volume-operations.t
new file mode 100644
index 00000000000..0cf8dbe81f9
--- /dev/null
+++ b/tests/line-coverage/cli-peer-and-volume-operations.t
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../volume.rc
+
+function peer_count {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup
+
+TEST launch_cluster 3
+
+TEST $CLI_1 system uuid reset
+
+## basic peer commands
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2
+
+#probe a unreachable node
+TEST kill_glusterd 3
+TEST ! $CLI_1 peer probe $H3
+
+#detach a node which is not a part of cluster
+TEST ! $CLI_1 peer detach $H3
+TEST ! $CLI_1 peer detach $H3 force
+
+TEST start_glusterd 3
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3
+
+# probe a node which is already part of cluster
+TEST $CLI_1 peer probe $H3
+
+#probe an invalid address
+TEST ! $CLI_1 peer probe 1024.1024.1024.1024
+
+TEST $CLI_1 pool list
+
+TEST $CLI_1 --help
+TEST $CLI_1 --version
+TEST $CLI_1 --print-logdir
+TEST $CLI_1 --print-statedumpdir
+
+# try unrecognised command
+TEST ! $CLI_1 volume
+TEST pidof glusterd
+
+## all help commands
+TEST $CLI_1 global help
+TEST $CLI_1 help
+
+TEST $CLI_1 peer help
+TEST $CLI_1 volume help
+TEST $CLI_1 volume bitrot help
+TEST $CLI_1 volume quota help
+TEST $CLI_1 snapshot help
+
+## volume operations
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+# create a volume with already existing volume name
+TEST ! $CLI_1 volume create $V0 $H1:$B1/$V1 $H2:$B2/$V1
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+# Mount the volume and create files
+TEST glusterfs -s $H1 --volfile-id $V0 $M1
+TEST touch $M1/file{1..100}
+
+#fails because $V0 is not shd compatible
+TEST ! $CLI_1 volume status $V0 shd
+
+#test explicitly provided options
+TEST $CLI_1 --timeout=120 --log-level=INFO volume status
+
+#changing timezone to a different one, to check localtime logging feature
+TEST export TZ='Asia/Kolkata'
+TEST restart_glusterd 1
+
+#localtime logging enable
+TEST $CLI_1 volume set all cluster.localtime-logging enable
+EXPECT '1' logging_time_check $LOGDIR
+
+#localtime logging disable
+TEST $CLI_1 volume set all cluster.localtime-logging disable
+EXPECT '0' logging_time_check $LOGDIR
+
+#changing timezone back to original timezone
+TEST export TZ='UTC'
+
+#negative tests for volume options
+#'set' option to enable quota/inode-quota is now depreciated
+TEST ! $CLI_1 volume set $V0 quota enable
+TEST ! $CLI_1 volume set $V0 inode-quota enable
+
+#invalid transport type 'rcp'
+TEST ! $CLI_1 volume set $V0 config.transport rcp
+
+#'op-version' option is not valid for a single volume
+TEST ! $CLI_1 volume set $V0 cluster.op-version 72000
+
+#'op-version' option can't be used with any other option
+TEST ! $CLI_1 volume set all cluster.localtime-logging disable cluster.op-version 72000
+
+#invalid format of 'op-version'
+TEST ! $CLI_1 volume set all cluster.op-version 72-000
+
+#provided 'op-version' value is greater than max allowed op-version
+op_version=$($CLI_1 volume get all cluster.max-op-version | awk 'NR==3 {print$2}')
+op_version=$((op_version+1000)) #this can be any number greater than 0
+TEST ! $CLI_1 volume set all cluster.op-version $op_version
+
+#provided 'op-verison' value cannot be less than the current cluster op-version value
+TEST ! $CLI_1 volume set all cluster.op-version 00000
+
+# system commnds
+TEST $CLI_1 system help
+TEST $CLI_1 system uuid get
+TEST $CLI_1 system getspec $V0
+TEST $CLI_1 system getwd
+TEST $CLI_1 system fsm log
+
+# Both these may fail, but it covers xdr functions and some
+# more code in cli/glusterd
+$CLI_1 system:: mount test local:/$V0
+$CLI_1 system:: umount $M0 lazy
+$CLI_1 system:: copy file options
+$CLI_1 system:: portmap brick2port $H0:$B0/brick
+$CLI_1 system:: uuid reset
+
+cleanup
diff --git a/tests/line-coverage/cli-volume-top-profile-coverage.t b/tests/line-coverage/cli-volume-top-profile-coverage.t
new file mode 100644
index 00000000000..35713c26faa
--- /dev/null
+++ b/tests/line-coverage/cli-volume-top-profile-coverage.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+# Creating cluster
+TEST launch_cluster 3
+
+# Probing peers
+TEST $CLI_1 peer probe $H2
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3
+
+# Creating a volume and starting it.
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+TEST glusterfs -s $H1 --volfile-id $V0 $M1
+TEST touch $M1/file{1..100}
+
+# Testing volume top command with and without xml output
+function test_volume_top_cmds () {
+ local ret=0
+ declare -a top_cmds=("read" "open" "write" "opendir" "readdir")
+ for cmd in ${top_cmds[@]}; do
+ $CLI_1 volume top $V0 $cmd
+ (( ret += $? ))
+ $CLI_1 volume top $V0 clear
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd --xml
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd brick $H1:$B1/$V0
+ (( ret += $? ))
+ $CLI_1 volume top $V0 clear brick $H1:$B1/$V0
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd brick $H1:$B1/$V0 --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+# Testing volume profile command with and without xml
+function test_volume_profile_cmds () {
+ local ret=0
+ declare -a profile_cmds=("start" "info" "info peek" "info cumulative" "info clear" "info incremental peek" "stop")
+ for cmd in "${profile_cmds[@]}"; do
+ $CLI_1 volume profile $V0 $cmd
+ (( ret += $? ))
+ $CLI_1 volume profile $V0 $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+TEST test_volume_top_cmds;
+TEST test_volume_profile_cmds;
+
+cleanup
diff --git a/tests/line-coverage/errorgen-coverage.t b/tests/line-coverage/errorgen-coverage.t
new file mode 100755
index 00000000000..f4622428d79
--- /dev/null
+++ b/tests/line-coverage/errorgen-coverage.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+# Because I have added 10 iterations of rpc-coverage and glfsxmp for errorgen
+SCRIPT_TIMEOUT=600
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume set $V0 error-gen posix;
+TEST $CLI volume set $V0 debug.error-failure 3%;
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+for i in $(seq 1 10); do
+ # as there is error-gen, there can be errors, so no
+ # need to test for success of below two commands
+ $(dirname $0)/../basic/rpc-coverage.sh $M1 >/dev/null
+ ./glfsxmp $V0 $H0 >/dev/null
+done
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+## Finish up
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/line-coverage/log-and-brick-ops-negative-case.t b/tests/line-coverage/log-and-brick-ops-negative-case.t
new file mode 100644
index 00000000000..d86cb452282
--- /dev/null
+++ b/tests/line-coverage/log-and-brick-ops-negative-case.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+
+#create volumes
+TEST $CLI volume create ${V0}_1 $H0:$B0/v{1..2}
+
+TEST $CLI volume create ${V0}_2 replica 3 arbiter 1 $H0:$B0/v{3..5}
+
+TEST $CLI volume create ${V0}_3 disperse 3 redundancy 1 $H0:$B0/v{6..8}
+TEST $CLI volume start ${V0}_3
+EXPECT 'Started' volinfo_field ${V0}_3 'Status'
+
+TEST $CLI volume create ${V0}_4 replica 3 $H0:$B0/v{9..14}
+TEST $CLI volume start ${V0}_4
+EXPECT 'Started' volinfo_field ${V0}_4 'Status'
+
+#log rotate option
+#provided volume does not exist
+TEST ! $CLI volume log ${V0}_5 rotate
+
+#volume must be started before using log rotate option
+TEST ! $CLI volume log ${V0}_1 rotate
+TEST $CLI volume start ${V0}_1
+EXPECT 'Started' volinfo_field ${V0}_1 'Status'
+
+#incorrect brick provided for the volume
+TEST ! $CLI volume log ${V0}_1 rotate $H0:$B0/v15
+
+#add-brick operations
+#volume must be in started to state to increase replica count
+TEST ! $CLI volume add-brick ${V0}_2 replica 4 $H0:$B0/v15
+TEST $CLI volume start ${V0}_2
+EXPECT 'Started' volinfo_field ${V0}_2 'Status'
+
+#incorrect number of bricks for a replica 4 volume
+TEST ! $CLI volume add-brick ${V0}_1 replica 4 $H0:$B0/v15
+
+#replica count provided is less than the current replica count
+TEST ! $CLI volume add-brick ${V0}_2 replica 2 $H0:$B0/v15
+
+#dispersed to replicated dispersed not possible
+TEST ! $CLI volume add-brick ${V0}_3 replica 2 $H0:$B0/v15
+
+#remove-brick operations
+#replica count option provided for dispersed vol
+TEST ! $CLI volume remove-brick ${V0}_3 replica 2 $H0:$B0/v8 start
+
+#given replica count is greater than the current replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 4 $H0:$B0/v5 start
+
+#number of bricks to be removed, must be a multiple of replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 3 $H0:$B0/v{3..4} start
+
+#less number of bricks given to reduce the replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 1 $H0:$B0/v3 start
+
+#bricks should be from different subvol
+TEST ! $CLI volume remove-brick ${V0}_4 replica 2 $H0:$B0/v{13..14} start
+
+#arbiter must be removed to reduce replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 1 $H0:$B0/v{3..4} start
+
+#removal of bricks is not allowed without reducing the replica count explicitly
+TEST ! $CLI volume remove-brick ${V0}_2 replica 3 $H0:$B0/v{3..5} start
+
+#incorrect brick for given vol
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v15 start
+
+#removing all the bricks are not allowed
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v{1..2} start
+
+#volume must not be stopped state while removing bricks
+TEST $CLI volume stop ${V0}_1
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v1 start
+
+cleanup \ No newline at end of file
diff --git a/tests/line-coverage/meta-max-coverage.t b/tests/line-coverage/meta-max-coverage.t
new file mode 100755
index 00000000000..1cc07610aa7
--- /dev/null
+++ b/tests/line-coverage/meta-max-coverage.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1
+
+TEST stat $M1/.meta/
+
+# expect failures in rpc-coverage.sh execution.
+res=$($(dirname $0)/../basic/rpc-coverage.sh $M1/.meta)
+
+
+# Expect errors here, hence no need to 'check for success'
+for file in $(find $M1/.meta type f -print); do
+ cat $file >/dev/null
+ echo 1>$file
+ echo hello>$file
+done
+
+TEST umount $M1
+
+cleanup;
diff --git a/tests/line-coverage/namespace-linecoverage.t b/tests/line-coverage/namespace-linecoverage.t
new file mode 100644
index 00000000000..8de6a0f279b
--- /dev/null
+++ b/tests/line-coverage/namespace-linecoverage.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+TEST glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.read-subvolume-index 0
+TEST $CLI volume set $V0 features.tag-namespaces on
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 storage.build-pgfid on
+
+sleep 2
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+
+mkdir -p $M1/namespace
+
+# subvol_1 = bar, subvol_2 = foo, subvol_3 = hey
+# Test create, write (tagged by loc, fd respectively).
+touch $M1/namespace/{bar,foo,hey}
+
+open $M1/namespace/hey
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/old-protocol.t b/tests/line-coverage/old-protocol.t
new file mode 100755
index 00000000000..5676e5636db
--- /dev/null
+++ b/tests/line-coverage/old-protocol.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+file="/var/lib/glusterd/vols/$V0/trusted-$V0.tcp-fuse.vol"
+sed -i -e 's$send-gids true$send-gids true\n option testing.old-protocol true$g' $file
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/quiesce-coverage.t b/tests/line-coverage/quiesce-coverage.t
new file mode 100755
index 00000000000..ca29343451e
--- /dev/null
+++ b/tests/line-coverage/quiesce-coverage.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+file="/var/lib/glusterd/vols/$V0/trusted-$V0.tcp-fuse.vol"
+
+cat >> ${file} <<EOF
+
+volume quiesce
+ type features/quiesce
+ subvolumes ${V0}
+end-volume
+EOF
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/shard-coverage.t b/tests/line-coverage/shard-coverage.t
new file mode 100644
index 00000000000..1797999c146
--- /dev/null
+++ b/tests/line-coverage/shard-coverage.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick
+TEST $CLI volume set $V0 features.shard on
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# It is good to copy the file locally and build it, so the scope remains
+# inside tests directory.
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/line-coverage/some-features-in-libglusterfs.t b/tests/line-coverage/some-features-in-libglusterfs.t
new file mode 100644
index 00000000000..5719c4e039c
--- /dev/null
+++ b/tests/line-coverage/some-features-in-libglusterfs.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function grep_string {
+ local f=$1
+ local string=$2
+ # The output of test script also shows up in log. Ignore them.
+ echo $(grep ${string} ${f} | grep -v "++++++" | wc -l)
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume set $V0 client-log-level TRACE
+TEST $CLI volume start $V0;
+
+log_file="$(gluster --print-logdir)/gluster.log"
+## Mount FUSE
+TEST $GFS -s $H0 --log-file $log_file --volfile-id $V0 $M1
+
+## Cover 'monitoring.c' here
+pgrep 'glusterfs' | xargs kill -USR2
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:USR2'
+
+## Also cover statedump
+pgrep 'glusterfs' | xargs kill -USR1
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:USR1'
+
+## Also cover SIGHUP
+pgrep 'glusterfs' | xargs kill -HUP
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:HUP'
+
+## Also cover SIGTERM
+pgrep 'glusterfs' | xargs kill -TERM
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'cleanup_and_exit'
+
+# Previous call should make umount of the process.
+# force_umount $M1
+
+# TODO: below section is commented out, mainly as our regression treats the test
+# as failure because sending ABRT signal will cause the process to dump core.
+# Our regression treats the test as failure, if there is a core.
+# FIXME: figure out a way to run this test, because this part of the code gets
+# executed only when there is coredump, and it is critical for debugging, to
+# keep it working always.
+
+# # Restart client
+# TEST $GFS -s $H0 --log-file $log_file --volfile-id $V0 $M1
+#
+# ## Also cover SIGABRT
+# pgrep 'glusterfs ' | xargs kill -ABRT
+#
+# TEST [ 1 -eq $(grep 'pending frames' $log_file | wc -l) ]
+
+TEST rm $log_file
+
+cleanup;
diff --git a/tests/line-coverage/volfile-with-all-graph-syntax.t b/tests/line-coverage/volfile-with-all-graph-syntax.t
new file mode 100644
index 00000000000..b137432cceb
--- /dev/null
+++ b/tests/line-coverage/volfile-with-all-graph-syntax.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+ option directory $B0/test
+ option multiple-line-string "I am
+ testing a feature of volfile graph.l"
+ option single-line-string "this is running on $H0"
+ option option-with-back-tick `date +%Y%M%d`
+end-volume
+EOF
+
+# This should succeed, but it will have some unknown options, which is OK.
+TEST glusterfs -f $B0/test.vol $M0;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+ type storage/posix
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ option test and test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ subvolumes
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+ new-option key value
+ option directory $B0/test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+cleanup;
diff --git a/tests/ssl.rc b/tests/ssl.rc
index 127f83f7577..b1ccc4c8d38 100644
--- a/tests/ssl.rc
+++ b/tests/ssl.rc
@@ -20,7 +20,7 @@ SSL_CA=$SSL_BASE/glusterfs.ca
# Create self-signed certificates
function create_self_signed_certs (){
- openssl genrsa -out $SSL_KEY 1024
+ openssl genrsa -out $SSL_KEY 2048
openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
ln $SSL_CERT $SSL_CA
return $?
diff --git a/tests/thin-arbiter.rc b/tests/thin-arbiter.rc
index c5ac00baaaf..e26d91b1907 100644
--- a/tests/thin-arbiter.rc
+++ b/tests/thin-arbiter.rc
@@ -106,7 +106,7 @@ volume ${V0}-index
subvolumes ${V0}-io-threads
end-volume
-volume ${V0}-io-stats
+volume $b
type debug/io-stats
option count-fop-hits off
option latency-measurement off
@@ -115,11 +115,6 @@ volume ${V0}-io-stats
subvolumes ${V0}-index
end-volume
-volume $b
- type performance/decompounder
- subvolumes ${V0}-io-stats
-end-volume
-
volume ${V0}-server
type protocol/server
option transport.listen-backlog 1024
@@ -178,7 +173,7 @@ function ta_start_mount_process()
{
mkdir -p $1
identifier=$(echo $1 | tr / .)
- if glusterfs -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1
+ if glusterfs --entry-timeout=0 --attribute-timeout=0 -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1
then
cat $B0/$identifier.pid
else
@@ -187,6 +182,13 @@ function ta_start_mount_process()
fi
}
+function ta_get_mount_pid()
+{
+ local mount_path=$1
+ identifier=$(echo $mount_path | tr / .)
+ cat $B0/${identifier}.pid
+}
+
function ta_create_mount_volfile()
{
local b0=$B0/$1
@@ -259,33 +261,20 @@ end-volume
volume ${V0}-distribute
type cluster/distribute
- option tier-hot-compact-frequency 604800
option rebal-throttle normal
option force-migration off
option lookup-optimize on
option weighted-rebalance on
option write-freq-threshold 0
option assert-no-child-down off
- option tier-pause off
- option watermark-low 75
- option tier-compact off
option lock-migration off
option lookup-unhashed on
- option tier-demote-frequency 3600
- option watermark-hi 90
- option tier-cold-compact-frequency 604800
option randomize-hash-range-by-gfid off
option unhashed-sticky-bit off
option use-readdirp on
option readdir-optimize off
option xattr-name trusted.glusterfs.dht
- option tier-max-mb 4000
- option tier-max-files 10000
- option tier-query-limit 100
option read-freq-threshold 0
- option tier-mode test
- option tier-max-promote-file-size 0
- option tier-promote-frequency 120
option min-free-disk 10%
option min-free-inodes 5%
option rebalance-stats off
@@ -444,7 +433,6 @@ cat > $B0/glustershd.vol <<EOF
volume ${V0}-replicate-0-client-0
type protocol/client
option send-gids on
- option transport.socket.lowlat off
option transport.socket.keepalive-interval 2
option remote-host $H0
option remote-subvolume $b0
@@ -490,7 +478,6 @@ volume ${V0}-replicate-0-client-1
option send-gids on
option non-blocking-io off
option transport.listen-backlog 1024
- option transport.socket.lowlat off
option transport.socket.keepalive-interval 2
option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
@@ -509,7 +496,6 @@ volume ${V0}-replicate-0-thin-arbiter-client
option remote-subvolume $ta
option filter-O_DIRECT disable
option non-blocking-io off
- option transport.socket.lowlat off
option transport.socket.keepalive-interval 2
option transport.socket.read-fail-log off
option remote-host $H0
@@ -612,3 +598,16 @@ function ta_start_shd_process()
return 1
fi
}
+
+function ta_mount_child_up_status()
+{
+ local mount_path=$1
+ #brick_id is (brick-num in volume info - 1)
+ local vol=$2
+ local brick_id=$3
+ local pid=$(ta_get_mount_pid $mount_path)
+ local fpath=$(generate_statedump $pid)
+ up=$(grep -a -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
diff --git a/tests/tier.rc b/tests/tier.rc
deleted file mode 100644
index 88acdc791eb..00000000000
--- a/tests/tier.rc
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/bin/bash
-
-# Common tier functions
-
-# Check if a file is being migrated
-# by checking for the presence of
-# the sticky bit
-# Args: $1 : path to file
-
-function is_sticky_set () {
- echo $1
- if [ -k $1 ];
- then
- echo "yes"
- else
- echo "no"
- fi
-}
-
-
-function exists_and_regular_file () {
- filepath=$1
- if [ -n "$filepath" ];
- then
- if [ -k "$filepath" ]
- then
- echo "no"
- else
- echo "yes"
- fi
- else
- echo "no"
- fi
-}
-
-
-function check_counters {
- index=0
- ret=0
- rm -f /tmp/tc*.txt
- echo "0" > /tmp/tc2.txt
- $CLI volume tier $V0 status | grep localhost > /tmp/tc.txt
-
- promote=`cat /tmp/tc.txt |awk '{print $2}'`
- demote=`cat /tmp/tc.txt |awk '{print $3}'`
- if [ "${promote}" != "${1}" ]; then
- echo "1" > /tmp/tc2.txt
-
- elif [ "${demote}" != "${2}" ]; then
- echo "2" > /tmp/tc2.txt
- fi
-
- # temporarily disable non-Linux tests.
- case $OSTYPE in
- NetBSD | FreeBSD | Darwin)
- echo "0" > /tmp/tc2.txt
- ;;
- esac
- cat /tmp/tc2.txt
-}
-
-
-function detach_start {
- $CLI volume tier $1 detach start
- echo $?;
-}
-
-
-# Grab md5sum without file path (failed attempt notifications are discarded)
-function fingerprint {
- md5sum $1 2> /dev/null | grep --only-matching -m 1 '^[0-9a-f]*'
-}
-
-
-
-# Create a large number of files in the current directory.
-# $1 : file name prefix. Will create files $2-1 to $2-$3
-# $2 : number of files
-
-function create_many_files {
- filename=$1
- num=$2
-
- for i in `seq 1 $num`; do
- dd if=/dev/urandom of=./${dirname}/${filename}$i bs=104857 count=1;
- done
-}
-
-
-function confirm_tier_removed {
- $CLI system getspec $V0 | grep $1
- if [ $? == 0 ]; then
- echo "1"
- else
- echo "0"
- fi
-}
-
-function confirm_vol_stopped {
- $CLI volume stop $1
- if [ $? == 0 ]; then
- echo "0"
- else
- echo "1"
- fi
-}
-
-
-function sleep_first_cycle {
- startTime=$(date +%s)
- mod=$(( ( $startTime % $1 ) + 1 ))
- sleep $mod
-}
-
-function sleep_until_mid_cycle {
- startTime=$(date +%s)
- mod=$(( ( $startTime % $1 ) + 1 ))
- mod=$(( $1 - $mod ))
- mod=$(( $mod + $1 / 2 ))
- sleep $mod
-}
-
-function tier_daemon_check () {
- pgrep -f "tierd/$V0"
- echo "$?"
-}
-
-function rebalance_run_time () {
- local time=$($CLI volume tier $1 status | awk '{print $6}' | sed -n 3p);
- local hh=$(echo $time | cut -d ':' -f1);
- local mm=$(echo $time | cut -d ':' -f2);
- local ss=$(echo $time | cut -d ':' -f3);
- local total=$(($hh * 3600 + $mm * 60 + $ss));
- echo $total;
-}
-
-function tier_detach_commit () {
- $CLI_1 volume tier $V0 detach commit | grep "success" | wc -l
-}
-
-function tier_detach_status_node_down () {
- $CLI_1 volume tier $V0 detach status | grep "WARNING" | wc -l
-}
-
-function tier_status_node_down () {
- $CLI_1 volume tier $V0 status | grep "WARNING" | wc -l
-}
-
-function tier_detach_status () {
- $CLI_1 volume tier $V0 detach status | grep "success" | wc -l
-}
-
-function wait_for_tier_start () {
- sleep 5
-}
-
-function tier_detach_commit_for_single_node () {
- $CLI volume tier $V0 detach commit | grep "success" | wc -l
-}
diff --git a/tests/utils/changelog/changelog.h b/tests/utils/changelog/changelog.h
index 969a1f370c2..1502b689eb4 100644
--- a/tests/utils/changelog/changelog.h
+++ b/tests/utils/changelog/changelog.h
@@ -116,4 +116,10 @@ int
gf_history_changelog(char *changelog_dir, unsigned long start,
unsigned long end, int n_parallel,
unsigned long *actual_end);
+int
+gf_history_changelog_scan();
+ssize_t
+gf_history_changelog_next_change(char *bufptr, size_t maxlen);
+int
+gf_history_changelog_done(char *file);
#endif
diff --git a/tests/utils/changelog/test-changelog-api.c b/tests/utils/changelog/test-changelog-api.c
new file mode 100644
index 00000000000..f4eb066b630
--- /dev/null
+++ b/tests/utils/changelog/test-changelog-api.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 5 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+int
+main(int argc, char **argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {
+ 0,
+ };
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ printf("-1");
+ fflush(stdout);
+ return -1;
+ }
+
+ /* get changes for brick "/d/backends/patchy0" */
+ ret = gf_changelog_register("/d/backends/patchy0", "/tmp/scratch_v1",
+ "/var/log/glusterfs/changes.log", 9, 5);
+ if (ret) {
+ printf("-2");
+ fflush(stdout);
+ return -1;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan();
+ if (nr_changes < 0) {
+ printf("-4");
+ fflush(stdout);
+ return -1;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ while ((changes = gf_changelog_next_change(fbuf, PATH_MAX)) > 0) {
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done(fbuf);
+ if (ret) {
+ printf("-5");
+ fflush(stdout);
+ return -1;
+ }
+ }
+
+ if (changes == -1) {
+ printf("-6");
+ fflush(stdout);
+ return -1;
+ }
+
+ next:
+ sleep(2);
+ }
+
+out:
+ printf("0");
+ fflush(stdout);
+ return ret;
+}
diff --git a/tests/utils/changelog/test-history-api.c b/tests/utils/changelog/test-history-api.c
new file mode 100644
index 00000000000..d78e387df10
--- /dev/null
+++ b/tests/utils/changelog/test-history-api.c
@@ -0,0 +1,111 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o gethistory `pkg-config --cflags libgfchangelog` get-history.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "changelog.h"
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ int i = 0;
+ unsigned long end_ts = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ int start = 0;
+ int end = 0;
+ char fbuf[PATH_MAX] = {
+ 0,
+ };
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ printf("-1");
+ fflush(stdout);
+ return -1;
+ }
+
+ ret = gf_changelog_register("/d/backends/patchy0", "/tmp/scratch_v1",
+ "/var/log/glusterfs/changes.log", 9, 5);
+ if (ret) {
+ printf("-2");
+ fflush(stdout);
+ return -1;
+ }
+
+ start = atoi(argv[1]);
+ end = atoi(argv[2]);
+
+ ret = gf_history_changelog("/d/backends/patchy0/.glusterfs/changelogs",
+ start, end, 3, &end_ts);
+ if (ret < 0) {
+ printf("-3");
+ fflush(stdout);
+ return -1;
+ } else if (ret == 1) {
+ printf("1");
+ fflush(stdout);
+ return 0;
+ }
+
+ while (1) {
+ nr_changes = gf_history_changelog_scan();
+ if (nr_changes < 0) {
+ printf("-4");
+ fflush(stdout);
+ return -1;
+ }
+
+ if (nr_changes == 0) {
+ goto out;
+ }
+
+ while ((changes = gf_history_changelog_next_change(fbuf, PATH_MAX)) >
+ 0) {
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_history_changelog_done(fbuf);
+ if (ret) {
+ printf("-5");
+ fflush(stdout);
+ return -1;
+ }
+ }
+ if (changes == -1) {
+ printf("-6");
+ fflush(stdout);
+ return -1;
+ }
+ }
+
+out:
+ printf("0");
+ fflush(stdout);
+ return 0;
+}
diff --git a/tests/utils/changelogparser.py b/tests/utils/changelogparser.py
index e8e252d195f..3b8f81d1bad 100644
--- a/tests/utils/changelogparser.py
+++ b/tests/utils/changelogparser.py
@@ -125,7 +125,10 @@ class Record(object):
return repr(self.__dict__)
def __str__(self):
- return unicode(self).encode('utf-8')
+ if sys.version_info >= (3,):
+ return self.__unicode__()
+ else:
+ return unicode(self).encode('utf-8')
def get_num_tokens(data, tokens, version=Version.V11):
diff --git a/tests/utils/create-files.py b/tests/utils/create-files.py
index b2a19610d63..04736e9c73b 100755
--- a/tests/utils/create-files.py
+++ b/tests/utils/create-files.py
@@ -19,6 +19,11 @@ import argparse
datsiz = 0
timr = 0
+def get_ascii_upper_alpha_digits():
+ if sys.version_info > (3,0):
+ return string.ascii_uppercase+string.digits
+ else:
+ return string.uppercase+string.digits
def setLogger(filename):
global logger
@@ -111,7 +116,7 @@ def create_tar_file(fil, size, mins, maxs, rand):
def get_filename(flen):
size = flen
- char = string.uppercase+string.digits
+ char = get_ascii_upper_alpha_digits()
st = ''.join(random.choice(char) for i in range(size))
ti = str((hex(int(str(time.time()).split('.')[0])))[2:])
return ti+"%%"+st
@@ -175,7 +180,7 @@ def tar_files(files, file_count, inter, size, mins, maxs,
def setxattr_files(files, randname, dir_path):
- char = string.uppercase+string.digits
+ char = get_ascii_upper_alpha_digits()
if not randname:
for k in range(files):
v = ''.join(random.choice(char) for i in range(10))
diff --git a/tests/utils/get-mdata-xattr.c b/tests/utils/get-mdata-xattr.c
new file mode 100644
index 00000000000..e9f54717263
--- /dev/null
+++ b/tests/utils/get-mdata-xattr.c
@@ -0,0 +1,152 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdlib.h>
+#include <endian.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <errno.h>
+
+typedef struct gf_timespec_disk {
+ uint64_t tv_sec;
+ uint64_t tv_nsec;
+} gf_timespec_disk_t;
+
+/* posix_mdata_t on disk structure */
+typedef struct __attribute__((__packed__)) posix_mdata_disk {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ gf_timespec_disk_t ctime;
+ gf_timespec_disk_t mtime;
+ gf_timespec_disk_t atime;
+} posix_mdata_disk_t;
+
+/* In memory representation posix metadata xattr */
+typedef struct {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ struct timespec ctime;
+ struct timespec mtime;
+ struct timespec atime;
+} posix_mdata_t;
+
+#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata"
+
+/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
+ */
+static inline void
+posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
+{
+ out->version = in->version;
+ out->flags = be64toh(in->flags);
+
+ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
+ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
+ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = be64toh(in->atime.tv_sec);
+ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+}
+
+/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+static int
+posix_fetch_mdata_xattr(const char *real_path, posix_mdata_t *metadata)
+{
+ size_t size = -1;
+ char *value = NULL;
+ char gfid_str[64] = {0};
+
+ char *key = GF_XATTR_MDATA_KEY;
+
+ if (!metadata || !real_path) {
+ goto err;
+ }
+
+ /* Get size */
+ size = lgetxattr(real_path, key, NULL, 0);
+ if (size == -1) {
+ goto err;
+ }
+
+ value = calloc(size + 1, sizeof(char));
+ if (!value) {
+ goto err;
+ }
+
+ /* Get xattr value */
+ size = lgetxattr(real_path, key, value, size);
+ if (size == -1) {
+ goto err;
+ }
+ posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value);
+
+out:
+ if (value)
+ free(value);
+ return 0;
+err:
+ if (value)
+ free(value);
+ return -1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ posix_mdata_t metadata;
+ uint64_t result;
+
+ if (argc != 3) {
+ /*
+ Usage: get_mdata_xattr -c|-m|-a <file-name>
+ where -c --> ctime
+ -m --> mtime
+ -a --> atime
+ */
+ printf("-1");
+ goto err;
+ }
+
+ if (posix_fetch_mdata_xattr(argv[2], &metadata)) {
+ printf("-1");
+ goto err;
+ }
+
+ switch (argv[1][1]) {
+ case 'c':
+ result = metadata.ctime.tv_sec;
+ break;
+ case 'm':
+ result = metadata.mtime.tv_sec;
+ break;
+ case 'a':
+ result = metadata.atime.tv_sec;
+ break;
+ default:
+ printf("-1");
+ goto err;
+ }
+ printf("%" PRIu64, result);
+ fflush(stdout);
+ return 0;
+err:
+ fflush(stdout);
+ return -1;
+}
diff --git a/tests/utils/gfid-access.py b/tests/utils/gfid-access.py
index 556d2b4c65b..c35c1223df6 100755
--- a/tests/utils/gfid-access.py
+++ b/tests/utils/gfid-access.py
@@ -33,23 +33,51 @@ def _fmt_mkdir(l):
def _fmt_symlink(l1, l2):
return "!II%dsI%ds%ds" % (37, l1+1, l2+1)
-def entry_pack_reg(gf, bn, mo, uid, gid):
- blen = len(bn)
- return struct.pack(_fmt_mknod(blen),
- uid, gid, gf, mo, bn,
- stat.S_IMODE(mo), 0, umask())
-
-def entry_pack_dir(gf, bn, mo, uid, gid):
- blen = len(bn)
- return struct.pack(_fmt_mkdir(blen),
- uid, gid, gf, mo, bn,
- stat.S_IMODE(mo), umask())
-
-def entry_pack_symlink(gf, bn, lnk, mo, uid, gid):
- blen = len(bn)
- llen = len(lnk)
- return struct.pack(_fmt_symlink(blen, llen),
- uid, gid, gf, mo, bn, lnk)
+
+if sys.version_info > (3,):
+ def entry_pack_reg(gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(_fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ # mkdir
+ def entry_pack_dir(gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(_fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+ def entry_pack_symlink(gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(_fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+
+else:
+ def entry_pack_reg(gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(_fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_dir(gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(_fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+
+ def entry_pack_symlink(gf, bn, lnk, mo, uid, gid):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(_fmt_symlink(blen, llen),
+ uid, gid, gf, mo, bn, lnk)
if __name__ == '__main__':
if len(sys.argv) < 9:
diff --git a/tests/utils/libcxattr.py b/tests/utils/libcxattr.py
index fd0b08378fc..3f3ed1fffbb 100644
--- a/tests/utils/libcxattr.py
+++ b/tests/utils/libcxattr.py
@@ -10,7 +10,9 @@
import os
import sys
-from ctypes import CDLL, c_int, create_string_buffer
+from ctypes import CDLL, c_int
+from py2py3 import bytearray_to_str, gr_create_string_buffer
+from py2py3 import gr_query_xattr, gr_lsetxattr, gr_lremovexattr
class Xattr(object):
@@ -47,20 +49,23 @@ class Xattr(object):
@classmethod
def _query_xattr(cls, path, siz, syscall, *a):
if siz:
- buf = create_string_buffer('\0' * siz)
+ buf = gr_create_string_buffer(siz)
else:
buf = None
ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
if ret == -1:
cls.raise_oserr()
if siz:
- return buf.raw[:ret]
+ # py2 and py3 compatibility. Convert bytes array
+ # to string
+ result = bytearray_to_str(buf.raw)
+ return result[:ret]
else:
return ret
@classmethod
def lgetxattr(cls, path, attr, siz=0):
- return cls._query_xattr(path, siz, 'lgetxattr', attr)
+ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr)
@classmethod
def lgetxattr_buf(cls, path, attr):
@@ -74,20 +79,21 @@ class Xattr(object):
@classmethod
def llistxattr(cls, path, siz=0):
- ret = cls._query_xattr(path, siz, 'llistxattr')
+ ret = gr_query_xattr(cls, path, siz, 'llistxattr')
if isinstance(ret, str):
- ret = ret.split('\0')
+ ret = ret.strip('\0')
+ ret = ret.split('\0') if ret else []
return ret
@classmethod
def lsetxattr(cls, path, attr, val):
- ret = cls.libc.lsetxattr(path, attr, val, len(val), 0)
+ ret = gr_lsetxattr(cls, path, attr, val)
if ret == -1:
cls.raise_oserr()
@classmethod
def lremovexattr(cls, path, attr):
- ret = cls.libc.lremovexattr(path, attr)
+ ret = gr_lremovexattr(cls, path, attr)
if ret == -1:
cls.raise_oserr()
diff --git a/tests/utils/py2py3.py b/tests/utils/py2py3.py
new file mode 100644
index 00000000000..63aca10fd26
--- /dev/null
+++ b/tests/utils/py2py3.py
@@ -0,0 +1,186 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import sys
+import os
+import stat
+import struct
+from ctypes import create_string_buffer
+
+def umask():
+ return os.umask(0)
+
+if sys.version_info >= (3,):
+ def pipe():
+ (r, w) = os.pipe()
+ os.set_inheritable(r, True)
+ os.set_inheritable(w, True)
+ return (r, w)
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ # Raw conversion of string to bytes. This is required to convert
+ # back the string into bytearray(c char array) to use in struc
+ # pack/unpacking. Again encode/decode can't be used as it
+ # converts it alters size.
+ def str_to_bytearray(string):
+ return bytes([ord(c) for c in string])
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path.encode(), size, syscall,
+ attr.encode())
+ else:
+ return cls._query_xattr(path.encode(), size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path.encode(), attr.encode(), val,
+ len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path.encode(), attr.encode())
+
+ def gr_cl_register(cls, brick, path, log_file, log_level, retries):
+ return cls._get_api('gf_changelog_register')(brick.encode(),
+ path.encode(),
+ log_file.encode(),
+ log_level, retries)
+
+ def gr_cl_done(cls, clfile):
+ return cls._get_api('gf_changelog_done')(clfile.encode())
+
+ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel,
+ actual_end):
+ return cls._get_api('gf_history_changelog')(changelog_path.encode(),
+ start, end, num_parallel,
+ actual_end)
+
+ def gr_cl_history_done(cls, clfile):
+ return cls._get_api('gf_history_changelog_done')(clfile.encode())
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+else:
+ def pipe():
+ (r, w) = os.pipe()
+ return (r, w)
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ # Raw conversion of string to bytearray
+ def str_to_bytearray(string):
+ return string
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path, size, syscall, attr)
+ else:
+ return cls._query_xattr(path, size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path, attr)
+
+ def gr_cl_register(cls, brick, path, log_file, log_level, retries):
+ return cls._get_api('gf_changelog_register')(brick, path, log_file,
+ log_level, retries)
+
+ def gr_cl_done(cls, clfile):
+ return cls._get_api('gf_changelog_done')(clfile)
+
+ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel,
+ actual_end):
+ return cls._get_api('gf_history_changelog')(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gr_cl_history_done(cls, clfile):
+ return cls._get_api('gf_history_changelog_done')(clfile)
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
diff --git a/tests/volume.rc b/tests/volume.rc
index 261c6554d46..b38848c0e52 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -84,10 +84,6 @@ function fix-layout_status_field {
$CLI volume rebalance $1 status | awk '{print $2,$3,$4}' |sed -n 3p |tr -d '[^0-9+\.]'|sed 's/ *$//g'
}
-function detach_tier_status_field_complete {
- $CLI volume tier $1 detach status | awk '{print $7}' |sed -n 4p
-}
-
function remove_brick_status_completed_field {
local vol=$1
local brick_list=$2
@@ -116,16 +112,33 @@ function cleanup_statedump {
#.vimrc friendly comment */
}
+function wait_statedump_ready {
+ local maxtime="${1}000000000"
+ local pid="$2"
+ local deadline="$(($(date +%s%N) + maxtime))"
+ local fname
+
+ while [[ "$(date +%s%N)" < "$deadline" ]]; do
+ fname="$statedumpdir/$(ls $statedumpdir | grep -E "\.$pid\.dump\.")"
+ if [[ -f "$fname" ]]; then
+ grep "^DUMP-END-TIME" "$fname" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ echo $fname
+ return
+ fi
+ fi
+ sleep 0.1
+ done
+
+ echo "nostatedump"
+}
+
function generate_statedump {
- local fpath=""
pid=$1
#remove old stale statedumps
cleanup_statedump $pid
kill -USR1 $pid
- #Wait till the statedump is generated
- sleep 1
- fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.")
- echo $statedumpdir/$fname
+ wait_statedump_ready 3 $pid
}
function generate_mount_statedump {
@@ -180,7 +193,7 @@ function afr_child_up_status_meta {
local mnt=$1
local repl=$2
local child=$3
- grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
+ grep -E "^child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
}
function client_connected_status_meta {
@@ -237,11 +250,13 @@ function ec_child_up_count_shd {
}
function get_shd_process_pid {
- ps auxww | grep glusterfs | grep -E "glustershd/glustershd.pid" | awk '{print $2}' | head -1
+ local vol=$1
+ ps auxww | grep "process-name\ glustershd" | awk '{print $2}' | head -1
}
function generate_shd_statedump {
- generate_statedump $(get_shd_process_pid)
+ local vol=$1
+ generate_statedump $(get_shd_process_pid $vol)
}
function generate_nfs_statedump {
@@ -281,6 +296,10 @@ function quotad_up_status {
gluster volume status | grep "Quota Daemon" | awk '{print $7}'
}
+function get_glusterd_pid {
+ pgrep '^glusterd$' | head -1
+}
+
function get_brick_pidfile {
local vol=$1
local host=$2
@@ -303,15 +322,12 @@ function kill_brick {
local socket=$(cat $cmdline | tr '\0' '\n' | grep '\.socket$')
gf_attach -d $socket $brick
- cnt=1
- while [ "$cnt" -le "$PROCESS_UP_TIMEOUT" ] ;
- do
- online=`$CLI volume status $vol $host:$brick --xml | sed -ne 's/.*<status>\([01]\)<\/status>/\1/p'`
- if [ $online -eq 0 ] ; then
- break;
- fi
- cnt=$(( $cnt + 1 ))
- sleep 1
+
+ local deadline="$(($(date +%s%N) + ${PROCESS_UP_TIMEOUT}000000000))"
+ while [[ "$(date +%s%N)" < "$deadline" ]]; do
+ if [[ "$(brick_up_status $vol $host $brick)" == "0" ]]; then
+ break
+ fi
done
}
@@ -371,6 +387,19 @@ function get_gfid2path {
getfattr -h --only-values -n glusterfs.gfidtopath $path 2>/dev/null
}
+function get_mdata {
+ local path=$1
+ getfattr -h -e hex -n trusted.glusterfs.mdata $path 2>/dev/null | grep "trusted.glusterfs.mdata" | cut -f2 -d'='
+}
+
+function get_mdata_count {
+ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | wc -l
+}
+
+function get_mdata_uniq_count {
+ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | uniq | wc -l
+}
+
function get_xattr_key {
local key=$1
local path=$2
@@ -540,9 +569,8 @@ function volume_exists() {
}
function killall_gluster() {
- pkill gluster
+ terminate_pids $(process_pids gluster)
find $GLUSTERD_PIDFILEDIR -name '*.pid' | xargs rm -f
- sleep 1
}
function afr_get_index_count {
@@ -833,6 +861,9 @@ function get_fd_count {
else
count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
fi
+# If no information is found for a given gfid, it means it has not been
+# accessed, so it doesn't have any open fd. In this case we return 0.
+ count="${count:-0}"
rm -f $statedump
echo $count
}
@@ -859,7 +890,6 @@ function get_mount_active_size_value {
local vol=$1
local mount=$2
local statedump=$(generate_mount_statedump $vol $mount)
- sleep 1
local val=$(grep "active_size" $statedump | cut -f2 -d'=' | tail -1)
rm -f $statedump
echo $val
@@ -869,7 +899,6 @@ function get_mount_lru_size_value {
local vol=$1
local mount=$2
local statedump=$(generate_mount_statedump $vol $mount)
- sleep 1
local val=$(grep "lru_size" $statedump | cut -f2 -d'=' | tail -1)
rm -f $statedump
echo $val
@@ -879,5 +908,95 @@ function check_changelog_op {
local clog_path=$1
local op=$2
- $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep $op | wc -l
+ $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l
+}
+
+function processed_changelogs {
+ local processed_dir=$1
+ count=$(ls -l $processed_dir | grep CHANGELOG | wc -l)
+ if [ $count -gt 0 ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function volgen_check_ancestry {
+ #Returns Y if ancestor_xl is an ancestor of $child_xl according to the volfile
+ local volfile="$1"
+
+ local child_xl_type="$2"
+ local child_xl="$3"
+
+ local ancestor_xl_type="$4"
+ local ancestor_xl="$5"
+
+ child_linenum=$(awk '/type $child_xl_type\/$child_xl/ {print FNR}' $volfile)
+ ancestor_linenum=$(awk '/type $ancestor_xl_type\/$ancestor_xl/ {print FNR}' $volfile)
+
+ if [ $child_linenum -lt $ancestor_linenum ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function get_shd_mux_pid {
+ local volume=$1
+ pid=`$CLI volume status $volume shd | awk '/Self-heal/{print $8}'`
+ echo $pid
+}
+
+function shd_count {
+ ps aux | grep "glustershd" | grep -v grep | wc -l
+}
+
+function number_healer_threads_shd {
+ local pid=$(get_shd_mux_pid $1)
+ pstack $pid | grep $2 | wc -l
+}
+
+function get_mtime {
+ local time=$(get-mdata-xattr -m $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %Y $1)
+ else
+ echo $time
+ fi
+}
+
+function get_ctime {
+ local time=$(get-mdata-xattr -c $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %Z $1)
+ else
+ echo $time
+ fi
+}
+
+function get_atime {
+ local time=$(get-mdata-xattr -a $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %X $1)
+ else
+ echo $time
+ fi
+}
+
+function get-xml()
+{
+ $CLI $1 --xml | xmllint --format - | grep $2 | sed 's/\(<"$2">\|<\/"$2">\)//g'
+}
+
+function logging_time_check()
+{
+ local logdir=$1
+ local logfile=`echo ${0##*/}`_glusterd1.log
+
+ cat $logdir/1/$logfile | tail -n 2 | head -n 1 | grep $(date +%H:%M) | wc -l
}
diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c
index 9b3d243841c..4acbe92bc8f 100644
--- a/tools/gfind_missing_files/gcrawler.c
+++ b/tools/gfind_missing_files/gcrawler.c
@@ -16,11 +16,11 @@
#include <string.h>
#include <dirent.h>
#include <assert.h>
-#include "locking.h"
+#include <glusterfs/locking.h>
-#include "compat.h"
-#include "list.h"
-#include "syscall.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/list.h>
+#include <glusterfs/syscall.h>
#define THREAD_MAX 32
#define BUMP(name) INC(name, 1)
diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
index f42fe7b05af..e7aaa0b5dd4 100644
--- a/tools/gfind_missing_files/gfind_missing_files.sh
+++ b/tools/gfind_missing_files/gfind_missing_files.sh
@@ -61,7 +61,7 @@ mount_slave()
parse_cli()
{
- if [[ $# -ne 4 ]]; then
+ if [ "$#" -ne 4 ]; then
echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
exit 1
else
diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py
index 5b5142d775a..5beece220f0 100755
--- a/tools/glusterfind/S57glusterfind-delete-post.py
+++ b/tools/glusterfind/S57glusterfind-delete-post.py
@@ -18,7 +18,7 @@ def handle_rm_error(func, path, exc_info):
def get_glusterd_workdir():
p = Popen(["gluster", "system::", "getwd"],
- stdout=PIPE, stderr=PIPE)
+ stdout=PIPE, stderr=PIPE, universal_newlines=True)
out, _ = p.communicate()
diff --git a/tools/glusterfind/glusterfind.in b/tools/glusterfind/glusterfind.in
index 79f06faeae0..ca154b625dd 100644
--- a/tools/glusterfind/glusterfind.in
+++ b/tools/glusterfind/glusterfind.in
@@ -10,6 +10,7 @@
import sys
sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/')
+sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/glusterfind')
from glusterfind.main import main
diff --git a/tools/glusterfind/src/Makefile.am b/tools/glusterfind/src/Makefile.am
index c180f051933..43b6141b01c 100644
--- a/tools/glusterfind/src/Makefile.am
+++ b/tools/glusterfind/src/Makefile.am
@@ -2,7 +2,7 @@ glusterfinddir = $(GLUSTERFS_LIBEXECDIR)/glusterfind
if WITH_SERVER
glusterfind_PYTHON = conf.py utils.py __init__.py \
- main.py libgfchangelog.py changelogdata.py
+ main.py libgfchangelog.py changelogdata.py gfind_py2py3.py
glusterfind_SCRIPTS = changelog.py nodeagent.py \
brickfind.py
diff --git a/tools/glusterfind/src/brickfind.py b/tools/glusterfind/src/brickfind.py
index 6597aa1ba7c..73b6350188d 100644
--- a/tools/glusterfind/src/brickfind.py
+++ b/tools/glusterfind/src/brickfind.py
@@ -41,12 +41,20 @@ def brickfind_crawl(brick, args):
with open(args.outfile, "a+") as fout:
brick_path_len = len(brick)
- def output_callback(path, filter_result):
+ def output_callback(path, filter_result, is_dir):
path = path.strip()
path = path[brick_path_len+1:]
- output_write(fout, path, args.output_prefix,
- encode=(not args.no_encode), tag=args.tag,
- field_separator=args.field_separator)
+
+ if args.type == "both":
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode), tag=args.tag,
+ field_separator=args.field_separator)
+ else:
+ if (is_dir and args.type == "d") or (
+ (not is_dir) and args.type == "f"):
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode), tag=args.tag,
+ field_separator=args.field_separator)
ignore_dirs = [os.path.join(brick, dirname)
for dirname in
@@ -77,6 +85,9 @@ def _get_args():
action="store_true")
parser.add_argument("--output-prefix", help="File prefix in output",
default=".")
+ parser.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both')
parser.add_argument("--field-separator", help="Field separator",
default=" ")
@@ -102,6 +113,6 @@ if __name__ == "__main__":
time_to_update = int(time.time())
brickfind_crawl(args.brick, args)
if not args.only_query:
- with open(status_file_pre, "w", buffering=0) as f:
+ with open(status_file_pre, "w") as f:
f.write(str(time_to_update))
sys.exit(0)
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
index feef2aebdef..a5e9ea4288f 100644
--- a/tools/glusterfind/src/changelog.py
+++ b/tools/glusterfind/src/changelog.py
@@ -14,6 +14,7 @@ import sys
import time
import xattr
import logging
+from gfind_py2py3 import bytearray_to_str
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import hashlib
try:
@@ -50,7 +51,7 @@ def pgfid_to_path(brick, changelog_data):
"""
# pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK
for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}):
- # In case of Data/Metadata only, pgfid1 will not be their
+ # In case of Data/Metadata only, pgfid1 will not be there
if row[0] == "":
continue
@@ -105,15 +106,55 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
file_xattrs = xattr.list(p)
for x in file_xattrs:
- if x.startswith("trusted.pgfid."):
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.pgfid."):
# PGFID in pgfid table
- changelog_data.pgfid_add(x.split(".")[-1])
+ changelog_data.pgfid_add(x_str.split(".")[-1])
except (IOError, OSError):
# All OS Errors ignored, since failures will be logged
# in End. All GFIDs present in gfidpath table
continue
+def enum_hard_links_using_gfid2path(brick, gfid, args):
+ hardlinks = []
+ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ if not os.path.isdir(p):
+ # we have a symlink or a normal file
+ try:
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.gfid2path."):
+ # get the value for the xattr i.e. <PGFID>/<BN>
+ v = xattr.getxattr(p, x_str)
+ v_str = bytearray_to_str(v)
+ pgfid, bn = v_str.split(os.sep)
+ try:
+ path = symlink_gfid_to_path(brick, pgfid)
+ fullpath = os.path.join(path, bn)
+ fullpath = output_path_prepare(fullpath, args)
+ hardlinks.append(fullpath)
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+ except (IOError, OSError):
+ pass
+ return hardlinks
+
+
+def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args):
+ path = ""
+ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
+ gfid = row[3].strip()
+ logger.debug("Processing gfid %s" % gfid)
+ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args)
+
+ path = ",".join(hardlinks)
+
+ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid})
+
+
def gfid_to_path_using_pgfid(brick, changelog_data, args):
"""
For all the pgfids collected, Converts to Path and
@@ -313,11 +354,11 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
changelog_data.commit()
logger.info("[2/4] Finished 'pgfid to path' conversions.")
- # Convert all GFIDs for which no other additional details available
- logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...")
- gfid_to_path_using_pgfid(brick, changelog_data, args)
+ # Convert all gfids recorded for data and metadata to all hardlink paths
+ logger.info("[3/4] Starting 'gfid2path' conversions ...")
+ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args)
changelog_data.commit()
- logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.")
+ logger.info("[3/4] Finished 'gfid2path' conversions.")
# If some GFIDs fail to get converted from previous step,
# convert using find
@@ -339,7 +380,7 @@ def changelog_crawl(brick, start, end, args):
# WORKING_DIR/BRICKHASH/OUTFILE
working_dir = os.path.dirname(args.outfile)
- brickhash = hashlib.sha1(brick)
+ brickhash = hashlib.sha1(brick.encode())
brickhash = str(brickhash.hexdigest())
working_dir = os.path.join(working_dir, brickhash)
@@ -374,6 +415,7 @@ def _get_args():
action="store_true")
parser.add_argument("--output-prefix", help="File prefix in output",
default=".")
+ parser.add_argument("--type",default="both")
parser.add_argument("-N", "--only-namespace-changes",
help="List only namespace changes",
action="store_true")
@@ -419,7 +461,7 @@ if __name__ == "__main__":
end))
actual_end = changelog_crawl(args.brick, start, end, args)
if not args.only_query:
- with open(status_file_pre, "w", buffering=0) as f:
+ with open(status_file_pre, "w") as f:
f.write(str(actual_end))
logger.info("%s Finished Changelog Crawl - End: %s" % (args.brick,
diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py
index ab766f85c0f..3849ba5dd1f 100644
--- a/tools/glusterfind/src/conf.py
+++ b/tools/glusterfind/src/conf.py
@@ -10,11 +10,11 @@
import os
try:
- from configparser import ConfigParser
+ from ConfigParser import ConfigParser
except ImportError:
- import ConfigParser
+ from configparser import ConfigParser
-config = ConfigParser.ConfigParser()
+config = ConfigParser()
config.read(os.path.join(os.path.dirname(os.path.abspath(__file__)),
"tool.conf"))
diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py
new file mode 100644
index 00000000000..87324fbf350
--- /dev/null
+++ b/tools/glusterfind/src/gfind_py2py3.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import os
+import sys
+from ctypes import create_string_buffer
+
+if sys.version_info >= (3,):
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ # def bytearray_to_str(byte_arr):
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ def gf_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfc.gf_history_changelog(changelog_path.encode(), start, end, num_parallel,
+ actual_end)
+
+ def gfind_changelog_register(libgfc, brick, path, log_file, log_level,
+ retries):
+ return libgfc.gf_changelog_register(brick.encode(), path.encode(), log_file.encode(),
+ log_level, retries)
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile.encode())
+
+ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
+ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
+ field_separator,
+ p_rep,
+ field_separator,
+ row_2_rep))
+
+ def gfind_write(f, row, field_separator, p_rep):
+ f.write(u"{0}{1}{2}\n".format(row,
+ field_separator,
+ p_rep))
+
+
+else:
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ def gf_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfc.gf_history_changelog(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gfind_changelog_register(libgfc, brick, path, log_file, log_level,
+ retries):
+ return libgfc.gf_changelog_register(brick, path, log_file,
+ log_level, retries)
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile)
+
+ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
+ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
+ field_separator,
+ p_rep,
+ field_separator,
+ row_2_rep).encode())
+
+ def gfind_write(f, row, field_separator, p_rep):
+ f.write(u"{0}{1}{2}\n".format(row,
+ field_separator,
+ p_rep).encode())
diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
index afb3387db2d..513bb101e93 100644
--- a/tools/glusterfind/src/libgfchangelog.py
+++ b/tools/glusterfind/src/libgfchangelog.py
@@ -9,15 +9,17 @@
# cases as published by the Free Software Foundation.
import os
-from ctypes import CDLL, get_errno, create_string_buffer, c_ulong, byref
-from ctypes import RTLD_GLOBAL
+from ctypes import CDLL, RTLD_GLOBAL, get_errno, create_string_buffer, c_ulong, byref
+from ctypes.util import find_library
+from gfind_py2py3 import bytearray_to_str, gf_create_string_buffer
+from gfind_py2py3 import gfind_history_changelog, gfind_changelog_register
+from gfind_py2py3 import gfind_history_changelog_done
class ChangelogException(OSError):
pass
-
-libgfc = CDLL("libgfchangelog.so", use_errno=True, mode=RTLD_GLOBAL)
+libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True)
def raise_oserr(prefix=None):
@@ -33,8 +35,7 @@ def cl_init():
def cl_register(brick, path, log_file, log_level, retries=0):
- ret = libgfc.gf_changelog_register(brick, path, log_file,
- log_level, retries)
+ ret = gfind_changelog_register(libgfc, brick, path, log_file,log_level, retries)
if ret == -1:
raise_oserr(prefix="gf_changelog_register")
@@ -49,7 +50,7 @@ def cl_history_scan():
def cl_history_changelog(changelog_path, start, end, num_parallel):
actual_end = c_ulong()
- ret = libgfc.gf_history_changelog(changelog_path, start, end,
+ ret = gfind_history_changelog(libgfc,changelog_path, start, end,
num_parallel,
byref(actual_end))
if ret == -1:
@@ -70,13 +71,15 @@ def cl_history_getchanges():
return f.split('.')[-1]
changes = []
- buf = create_string_buffer('\0', 4096)
+ buf = gf_create_string_buffer(4096)
while True:
ret = libgfc.gf_history_changelog_next_change(buf, 4096)
if ret in (0, -1):
break
- changes.append(buf.raw[:ret - 1])
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
if ret == -1:
raise_oserr(prefix="gf_history_changelog_next_change")
@@ -84,6 +87,6 @@ def cl_history_getchanges():
def cl_history_done(clfile):
- ret = libgfc.gf_history_changelog_done(clfile)
+ ret = gfind_history_changelog_done(libgfc, clfile)
if ret == -1:
raise_oserr(prefix="gf_history_changelog_done")
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
index dd44cfbfb74..4b5466d0114 100644
--- a/tools/glusterfind/src/main.py
+++ b/tools/glusterfind/src/main.py
@@ -16,12 +16,14 @@ from multiprocessing import Process
import os
import xml.etree.cElementTree as etree
from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action
+from gfind_py2py3 import gfind_write_row, gfind_write
import logging
import shutil
import tempfile
import signal
from datetime import datetime
import codecs
+import re
from utils import execute, is_host_local, mkdirp, fail
from utils import setup_logger, human_time, handle_rm_error
@@ -35,9 +37,9 @@ GlusterFS Incremental API
ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
logger = logging.getLogger()
-node_outfiles = []
vol_statusStr = ""
gtmpfilename = None
+g_pid_nodefile_map = {}
class StoreAbsPath(Action):
@@ -111,7 +113,7 @@ def node_cmd(host, host_uuid, task, cmd, args, opts):
def run_cmd_nodes(task, args, **kwargs):
- global node_outfiles
+ global g_pid_nodefile_map
nodes = get_nodes(args.volume)
pool = []
for num, node in enumerate(nodes):
@@ -142,7 +144,6 @@ def run_cmd_nodes(task, args, **kwargs):
if tag == "":
tag = '""' if not is_host_local(host_uuid) else ""
- node_outfiles.append(node_outfile)
# remote file will be copied into this directory
mkdirp(os.path.dirname(node_outfile),
exit_on_err=True, logger=logger)
@@ -165,6 +166,7 @@ def run_cmd_nodes(task, args, **kwargs):
(["--no-encode"] if args.no_encode else []) + \
(["--only-namespace-changes"] if args.only_namespace_changes
else []) + \
+ (["--type", args.type]) + \
(["--field-separator", FS] if args.full else [])
opts["node_outfile"] = node_outfile
@@ -179,7 +181,6 @@ def run_cmd_nodes(task, args, **kwargs):
if tag == "":
tag = '""' if not is_host_local(host_uuid) else ""
- node_outfiles.append(node_outfile)
# remote file will be copied into this directory
mkdirp(os.path.dirname(node_outfile),
exit_on_err=True, logger=logger)
@@ -203,6 +204,7 @@ def run_cmd_nodes(task, args, **kwargs):
(["--no-encode"] if args.no_encode else []) + \
(["--only-namespace-changes"]
if args.only_namespace_changes else []) + \
+ (["--type", args.type]) + \
(["--field-separator", FS] if args.full else [])
opts["node_outfile"] = node_outfile
@@ -262,6 +264,7 @@ def run_cmd_nodes(task, args, **kwargs):
args=(host, host_uuid, task, cmd, args, opts))
p.start()
pool.append(p)
+ g_pid_nodefile_map[p.pid] = node_outfile
for num, p in enumerate(pool):
p.join()
@@ -269,8 +272,11 @@ def run_cmd_nodes(task, args, **kwargs):
logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
if task in ["create", "delete"]:
fail("Command %s failed in %s" % (task, nodes[num][1]))
- elif task == "pre" and args.disable_partial:
- sys.exit(1)
+ elif task == "pre" or task == "query":
+ if args.disable_partial:
+ sys.exit(1)
+ else:
+ del g_pid_nodefile_map[p.pid]
@cache_output
@@ -320,6 +326,7 @@ def _get_args():
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description=PROG_DESCRIPTION)
subparsers = parser.add_subparsers(dest="mode")
+ subparsers.required = True
# create <SESSION> <VOLUME> [--debug] [--force]
parser_create = subparsers.add_parser('create')
@@ -370,6 +377,9 @@ def _get_args():
help="Tag prefix for file names emitted during"
" a full find operation; default: \"NEW\"",
default="NEW")
+ parser_pre.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both', choices=["f", "d", "both"])
parser_pre.add_argument("--field-separator", help="Field separator string",
default=" ")
@@ -399,6 +409,9 @@ def _get_args():
help="Tag prefix for file names emitted during"
" a full find operation; default: \"NEW\"",
default="NEW")
+ parser_query.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both', choices=["f", "d", "both"])
parser_query.add_argument("--field-separator",
help="Field separator string",
default=" ")
@@ -503,22 +516,13 @@ def write_output(outfile, outfilemerger, field_separator):
continue
if row_2_rep and row_2_rep != "":
- f.write("{0}{1}{2}{3}{4}\n".format(row[0],
- field_separator,
- p_rep,
- field_separator,
- row_2_rep))
- else:
- f.write("{0}{1}{2}\n".format(row[0],
- field_separator,
- p_rep))
-
+ gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep)
-def mode_create(session_dir, args):
- logger.debug("Init is called - Session: %s, Volume: %s"
- % (args.session, args.volume))
+ else:
+ gfind_write(f, row[0], field_separator, p_rep)
- cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
+def validate_volume(volume):
+ cmd = ["gluster", 'volume', 'info', volume, "--xml"]
_, data, _ = execute(cmd,
exit_msg="Failed to Run Gluster Volume Info",
logger=logger)
@@ -526,11 +530,42 @@ def mode_create(session_dir, args):
tree = etree.fromstring(data)
statusStr = tree.find('volInfo/volumes/volume/statusStr').text
except (ParseError, AttributeError) as e:
- fail("Invalid Volume: %s" % e, logger=logger)
-
+ fail("Invalid Volume: Check the Volume name! %s" % e)
if statusStr != "Started":
- fail("Volume %s is not online" % args.volume, logger=logger)
+ fail("Volume %s is not online" % volume)
+
+# The rules for a valid session name.
+SESSION_NAME_RULES = {
+ 'min_length': 2,
+ 'max_length': 256, # same as maximum volume length
+ # Specifies all alphanumeric characters, underscore, hyphen.
+ 'valid_chars': r'0-9a-zA-Z_-',
+}
+
+
+# checks valid session name, fail otherwise
+def validate_session_name(session):
+ # Check for minimum length
+ if len(session) < SESSION_NAME_RULES['min_length']:
+ fail('session_name must be at least ' +
+ str(SESSION_NAME_RULES['min_length']) + ' characters long.')
+ # Check for maximum length
+ if len(session) > SESSION_NAME_RULES['max_length']:
+ fail('session_name must not exceed ' +
+ str(SESSION_NAME_RULES['max_length']) + ' characters length.')
+
+ # Matches strings composed entirely of characters specified within
+ if not re.match(r'^[' + SESSION_NAME_RULES['valid_chars'] +
+ ']+$', session):
+ fail('Session name can only contain these characters: ' +
+ SESSION_NAME_RULES['valid_chars'])
+
+def mode_create(session_dir, args):
+ validate_session_name(args.session)
+
+ logger.debug("Init is called - Session: %s, Volume: %s"
+ % (args.session, args.volume))
mkdirp(session_dir, exit_on_err=True, logger=logger)
mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
logger=logger)
@@ -551,7 +586,7 @@ def mode_create(session_dir, args):
run_cmd_nodes("create", args, time_to_update=str(time_to_update))
if not os.path.exists(status_file) or args.reset_session_time:
- with open(status_file, "w", buffering=0) as f:
+ with open(status_file, "w") as f:
f.write(str(time_to_update))
sys.stdout.write("Session %s created with volume %s\n" %
@@ -562,6 +597,7 @@ def mode_create(session_dir, args):
def mode_query(session_dir, args):
global gtmpfilename
+ global g_pid_nodefile_map
# Verify volume status
cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
@@ -589,6 +625,8 @@ def mode_query(session_dir, args):
enable_volume_options(args)
# Test options
+ if not args.full and args.type in ["f", "d"]:
+ fail("--type can only be used with --full")
if not args.since_time and not args.end_time and not args.full:
fail("Please specify either {--since-time and optionally --end-time} "
"or --full", logger=logger)
@@ -623,14 +661,20 @@ def mode_query(session_dir, args):
# Merger
if args.full:
- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
- execute(cmd,
- exit_msg="Failed to merge output files "
- "collected from nodes", logger=logger)
+ if len(g_pid_nodefile_map) > 0:
+ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+ "collected from nodes", logger=logger)
+ else:
+ fail("Failed to collect any output files from peers. "
+ "Looks like all bricks are offline.", logger=logger)
else:
# Read each Changelogs db and generate finaldb
create_file(args.outfile, exit_on_err=True, logger=logger)
- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+ list(g_pid_nodefile_map.values()))
write_output(args.outfile, outfilemerger, args.field_separator)
try:
@@ -645,6 +689,7 @@ def mode_query(session_dir, args):
def mode_pre(session_dir, args):
global gtmpfilename
+ global g_pid_nodefile_map
"""
Read from Session file and write to session.pre file
@@ -656,6 +701,9 @@ def mode_pre(session_dir, args):
mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)
+ if not args.full and args.type in ["f", "d"]:
+ fail("--type can only be used with --full")
+
# If Pre status file exists and running pre command again
if os.path.exists(status_file_pre) and not args.regenerate_outfile:
fail("Post command is not run after last pre, "
@@ -682,14 +730,20 @@ def mode_pre(session_dir, args):
# Merger
if args.full:
- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
- execute(cmd,
- exit_msg="Failed to merge output files "
- "collected from nodes", logger=logger)
+ if len(g_pid_nodefile_map) > 0:
+ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+ "collected from nodes", logger=logger)
+ else:
+ fail("Failed to collect any output files from peers. "
+ "Looks like all bricks are offline.", logger=logger)
else:
# Read each Changelogs db and generate finaldb
create_file(args.outfile, exit_on_err=True, logger=logger)
- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+ list(g_pid_nodefile_map.values()))
write_output(args.outfile, outfilemerger, args.field_separator)
try:
@@ -699,7 +753,7 @@ def mode_pre(session_dir, args):
run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)
- with open(status_file_pre, "w", buffering=0) as f:
+ with open(status_file_pre, "w") as f:
f.write(str(endtime_to_update))
sys.stdout.write("Generated output file %s\n" % args.outfile)
@@ -825,6 +879,11 @@ def main():
args.mode not in ["create", "list", "query"]:
fail("Invalid session %s" % args.session)
+ # volume involved, validate the volume first
+ if args.mode not in ["list"]:
+ validate_volume(args.volume)
+
+
# "default" is a system defined session name
if args.mode in ["create", "post", "pre", "delete"] and \
args.session == "default":
diff --git a/tools/glusterfind/src/nodeagent.py b/tools/glusterfind/src/nodeagent.py
index c36341f216e..679daa6fa76 100644
--- a/tools/glusterfind/src/nodeagent.py
+++ b/tools/glusterfind/src/nodeagent.py
@@ -58,7 +58,7 @@ def mode_create(args):
logger=logger)
if not os.path.exists(status_file) or args.reset_session_time:
- with open(status_file, "w", buffering=0) as f:
+ with open(status_file, "w") as f:
f.write(args.time_to_update)
sys.exit(0)
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
index b376241820b..906ebd8f252 100644
--- a/tools/glusterfind/src/utils.py
+++ b/tools/glusterfind/src/utils.py
@@ -58,12 +58,13 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
# Capture filter_func output and pass it to callback function
filter_result = filter_func(path)
if filter_result is not None:
- callback_func(path, filter_result)
+ callback_func(path, filter_result, os.path.isdir(path))
for p in os.listdir(path):
full_path = os.path.join(path, p)
- if os.path.isdir(full_path):
+ is_dir = os.path.isdir(full_path)
+ if is_dir:
if subdirs_crawl:
find(full_path, callback_func, filter_func, ignore_dirs)
else:
@@ -73,7 +74,7 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
else:
filter_result = filter_func(full_path)
if filter_result is not None:
- callback_func(full_path, filter_result)
+ callback_func(full_path, filter_result, is_dir)
def output_write(f, path, prefix=".", encode=False, tag="",
@@ -229,7 +230,11 @@ def get_changelog_rollover_time(volumename):
try:
tree = etree.fromstring(out)
- return int(tree.find('volGetopts/Opt/Value').text)
+ val = tree.find('volGetopts/Opt/Value').text
+ if val is not None:
+ # Filter the value by split, as it may be 'X (DEFAULT)'
+ # and we only need 'X'
+ return int(val.split(' ', 1)[0])
except ParseError:
return DEFAULT_CHANGELOG_INTERVAL
diff --git a/tools/setgfid2path/src/main.c b/tools/setgfid2path/src/main.c
index c9e71c12815..4320a7b2481 100644
--- a/tools/setgfid2path/src/main.c
+++ b/tools/setgfid2path/src/main.c
@@ -10,8 +10,8 @@
#include <stdio.h>
#include <libgen.h>
-#include "common-utils.h"
-#include "syscall.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syscall.h>
#define MAX_GFID2PATH_LINK_SUP 500
#define GFID_SIZE 16
@@ -28,7 +28,7 @@ main(int argc, char **argv)
uuid_t pgfid_raw = {
0,
};
- char pgfid[36] = "";
+ char pgfid[36 + 1] = "";
char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
0,
};
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 29549db724e..ef20cbb64fa 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -2,10 +2,10 @@ if BUILD_GNFS
GNFS_DIR = nfs
endif
-DIST_SUBDIRS = cluster storage protocol performance debug features encryption \
+DIST_SUBDIRS = cluster storage protocol performance debug features \
mount nfs mgmt system playground meta
-SUBDIRS = cluster storage protocol performance debug features encryption \
+SUBDIRS = cluster storage protocol performance debug features \
mount ${GNFS_DIR} mgmt system playground meta
EXTRA_DIST = xlator.sym
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 231a7970928..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -15,22 +15,20 @@
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-#include "events.h"
-#include "upcall-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/list.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/events.h>
+#include <glusterfs/upcall-utils.h>
#include "afr-inode-read.h"
#include "afr-inode-write.h"
@@ -47,6 +45,56 @@ afr_quorum_errno(afr_private_t *priv)
return ENOTCONN;
}
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
+{
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
+
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ replies[i] = 1;
+ } else {
+ replies[i] = 0;
+ }
+ }
+}
+
int
afr_fav_child_reset_sink_xattrs(void *opaque);
@@ -56,6 +104,581 @@ afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *frame, void *opaque);
static void
afr_discover_done(call_frame_t *frame, xlator_t *this);
+int
+afr_dom_lock_acquire_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ local->cont.lk.dom_lock_op_ret[i] = op_ret;
+ local->cont.lk.dom_lock_op_errno[i] = op_errno;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to acquire %s on %s",
+ uuid_utoa(local->fd->inode->gfid), AFR_LK_HEAL_DOM,
+ priv->children[i]->name);
+ } else {
+ local->cont.lk.dom_locked_nodes[i] = 1;
+ }
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+int
+afr_dom_lock_acquire(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ local->cont.lk.dom_locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+ if (!local->cont.lk.dom_locked_nodes) {
+ return -ENOMEM;
+ }
+ local->cont.lk.dom_lock_op_ret = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_ret),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_ret) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ local->cont.lk.dom_lock_op_errno = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_errno),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_errno) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ flock.l_type = F_WRLCK;
+
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLK, &flock, NULL);
+
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL))
+ goto blocking_lock;
+
+ /*If any of the bricks returned EAGAIN, we still need blocking locks.*/
+ if (AFR_COUNT(local->cont.lk.dom_locked_nodes, priv->child_count) !=
+ priv->child_count) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.dom_lock_op_ret[i] == -1 &&
+ local->cont.lk.dom_lock_op_errno[i] == EAGAIN)
+ goto blocking_lock;
+ }
+ }
+
+ return 0;
+
+blocking_lock:
+ afr_dom_lock_release(frame);
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLKW, &flock, NULL);
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL)) {
+ afr_dom_lock_release(frame);
+ return -afr_quorum_errno(priv);
+ }
+
+ return 0;
+}
+
+int
+afr_dom_lock_release_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to release %s on %s", local->loc.path,
+ AFR_LK_HEAL_DOM, priv->children[i]->name);
+ }
+ local->cont.lk.dom_locked_nodes[i] = 0;
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+void
+afr_dom_lock_release(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = frame->this->private;
+ locked_on = local->cont.lk.dom_locked_nodes;
+ if (AFR_COUNT(locked_on, priv->child_count) == 0)
+ return;
+ flock.l_type = F_UNLCK;
+
+ AFR_ONLIST(locked_on, frame, afr_dom_lock_release_cbk, finodelk,
+ AFR_LK_HEAL_DOM, local->fd, F_SETLK, &flock, NULL);
+
+ return;
+}
+
+static void
+afr_lk_heal_info_cleanup(afr_lk_heal_info_t *info)
+{
+ if (!info)
+ return;
+ if (info->xdata_req)
+ dict_unref(info->xdata_req);
+ if (info->fd)
+ fd_unref(info->fd);
+ GF_FREE(info->locked_nodes);
+ GF_FREE(info->child_up_event_gen);
+ GF_FREE(info->child_down_event_gen);
+ GF_FREE(info);
+}
+
+static int
+afr_add_lock_to_saved_locks(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -ENOMEM;
+
+ info = GF_CALLOC(sizeof(*info), 1, gf_afr_mt_lk_heal_info_t);
+ if (!info) {
+ goto cleanup;
+ }
+ INIT_LIST_HEAD(&info->pos);
+ info->fd = fd_ref(local->fd);
+ info->cmd = local->cont.lk.cmd;
+ info->pid = frame->root->pid;
+ info->flock = local->cont.lk.user_flock;
+ info->xdata_req = dict_copy_with_ref(local->xdata_req, NULL);
+ if (!info->xdata_req) {
+ goto cleanup;
+ }
+ info->lk_owner = frame->root->lk_owner;
+ info->locked_nodes = GF_MALLOC(
+ sizeof(*info->locked_nodes) * priv->child_count, gf_afr_mt_char);
+ if (!info->locked_nodes) {
+ goto cleanup;
+ }
+ memcpy(info->locked_nodes, local->cont.lk.locked_nodes,
+ sizeof(*info->locked_nodes) * priv->child_count);
+ info->child_up_event_gen = GF_CALLOC(sizeof(*info->child_up_event_gen),
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!info->child_up_event_gen) {
+ goto cleanup;
+ }
+ info->child_down_event_gen = GF_CALLOC(sizeof(*info->child_down_event_gen),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!info->child_down_event_gen) {
+ goto cleanup;
+ }
+
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(local->fd, this);
+ if (fd_ctx)
+ fd_ctx->lk_heal_info = info;
+ }
+ UNLOCK(&local->fd->lock);
+ if (!fd_ctx) {
+ goto cleanup;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&info->pos, &priv->saved_locks);
+ }
+ UNLOCK(&priv->lock);
+
+ return 0;
+cleanup:
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to add lock to healq",
+ uuid_utoa(local->fd->inode->gfid));
+ if (info) {
+ afr_lk_heal_info_cleanup(info);
+ if (fd_ctx) {
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx->lk_heal_info = NULL;
+ }
+ UNLOCK(&local->fd->lock);
+ }
+ }
+ return ret;
+}
+
+static int
+afr_remove_lock_from_saved_locks(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = local->cont.lk.user_flock;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -EINVAL;
+
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx || !fd_ctx->lk_heal_info) {
+ goto out;
+ }
+
+ info = fd_ctx->lk_heal_info;
+ if ((info->flock.l_start != flock.l_start) ||
+ (info->flock.l_whence != flock.l_whence) ||
+ (info->flock.l_len != flock.l_len)) {
+ /*TODO: Compare lkowners too.*/
+ goto out;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ UNLOCK(&priv->lock);
+
+ afr_lk_heal_info_cleanup(info);
+ fd_ctx->lk_heal_info = NULL;
+ ret = 0;
+out:
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to remove lock from healq",
+ uuid_utoa(local->fd->inode->gfid));
+ return ret;
+}
+
+int
+afr_lock_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed to heal lock on child %d for %s", i,
+ uuid_utoa(local->fd->inode->gfid));
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_getlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
+ } else {
+ local->cont.lk.getlk_rsp[i] = *lock;
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+static gf_boolean_t
+afr_does_lk_owner_match(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ afr_local_t *local = frame->local;
+ struct gf_flock flock = {
+ 0,
+ };
+ gf_boolean_t ret = _gf_true;
+ char *wind_on = alloca0(priv->child_count);
+ unsigned char *success_replies = alloca0(priv->child_count);
+ local->cont.lk.getlk_rsp = GF_CALLOC(sizeof(*local->cont.lk.getlk_rsp),
+ priv->child_count, gf_afr_mt_gf_lock);
+
+ flock = info->flock;
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ wind_on[i] = 1;
+ }
+
+ AFR_ONLIST(wind_on, frame, afr_getlk_cbk, lk, info->fd, F_GETLK, &flock,
+ info->xdata_req);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) == 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret != 0)
+ continue;
+ if (local->cont.lk.getlk_rsp[i].l_type == F_UNLCK)
+ continue;
+ /*TODO: Do we really need to compare lkowner if F_UNLCK is true?*/
+ if (!is_same_lkowner(&local->cont.lk.getlk_rsp[i].l_owner,
+ &info->lk_owner)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+out:
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->cont.lk.getlk_rsp);
+ local->cont.lk.getlk_rsp = NULL;
+ return ret;
+}
+
+static void
+afr_mark_fd_bad(fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ if (!fd)
+ return;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ if (fd_ctx) {
+ fd_ctx->is_fd_bad = _gf_true;
+ fd_ctx->lk_heal_info = NULL;
+ }
+ }
+ UNLOCK(&fd->lock);
+}
+
+static void
+afr_add_lock_to_lkhealq(afr_private_t *priv, afr_lk_heal_info_t *info)
+{
+ LOCK(&priv->lock);
+ {
+ list_del(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+}
+
+static void
+afr_lock_heal_do(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ int op_errno = 0;
+ int32_t *current_event_gen = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ char *wind_on = alloca0(priv->child_count);
+ gf_boolean_t retry = _gf_true;
+
+ frame->root->pid = info->pid;
+ lk_owner_copy(&frame->root->lk_owner, &info->lk_owner);
+
+ op_errno = -afr_dom_lock_acquire(frame);
+ if ((op_errno != 0)) {
+ goto release;
+ }
+
+ if (!afr_does_lk_owner_match(frame, priv, info)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_LK_HEAL_DOM,
+ "Ignoring lock heal for %s since lk-onwers mismatch. "
+ "Lock possibly pre-empted by another client.",
+ uuid_utoa(info->fd->inode->gfid));
+ goto release;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ continue;
+ wind_on[i] = 1;
+ }
+
+ current_event_gen = alloca(priv->child_count);
+ memcpy(current_event_gen, info->child_up_event_gen,
+ priv->child_count * sizeof *current_event_gen);
+ AFR_ONLIST(wind_on, frame, afr_lock_heal_cbk, lk, info->fd, info->cmd,
+ &info->flock, info->xdata_req);
+
+ LOCK(&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret != 0)) {
+ continue;
+ }
+
+ if ((current_event_gen[i] == info->child_up_event_gen[i]) &&
+ (current_event_gen[i] > info->child_down_event_gen[i])) {
+ info->locked_nodes[i] = 1;
+ retry = _gf_false;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->saved_locks);
+ } else {
+ /*We received subsequent child up/down events while heal was in
+ * progress; don't mark child as healed. Attempt again on the
+ * new child up*/
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_LK_HEAL_DOM,
+ "Event gen mismatch: skipped healing lock on child %d "
+ "for %s.",
+ i, uuid_utoa(info->fd->inode->gfid));
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+release:
+ afr_dom_lock_release(frame);
+ if (retry)
+ afr_add_lock_to_lkhealq(priv, info);
+ return;
+}
+
+static int
+afr_lock_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static int
+afr_lock_heal(void *opaque)
+{
+ call_frame_t *frame = (call_frame_t *)opaque;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+ struct list_head healq = {
+ 0,
+ };
+ int ret = 0;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ return ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&healq);
+ LOCK(&priv->lock);
+ {
+ list_splice_init(&priv->lk_healq, &healq);
+ }
+ UNLOCK(&priv->lock);
+
+ list_for_each_entry_safe(info, tmp, &healq, pos)
+ {
+ GF_ASSERT((AFR_COUNT(info->locked_nodes, priv->child_count) <
+ priv->child_count));
+ ((afr_local_t *)(iter_frame->local))->fd = fd_ref(info->fd);
+ afr_lock_heal_do(iter_frame, priv, info);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = ENOTCONN;
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOTCONN,
+ AFR_MSG_LK_HEAL_DOM,
+ "Aborting processing of lk_healq."
+ "Healing will be reattempted on next child up for locks "
+ "that are still in quorum.");
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&healq, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+ break;
+ }
+ }
+
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
+}
+
+static int
+__afr_lock_heal_synctask(xlator_t *this, afr_private_t *priv, int child)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+
+ if (priv->shd.iamshd)
+ return 0;
+
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_up_event_gen[child] = priv->event_generation;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return -1;
+
+ ret = synctask_new(this->ctx->env, afr_lock_heal, afr_lock_heal_done, frame,
+ frame);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_LK_HEAL_DOM,
+ "Failed to launch lock heal synctask");
+
+ return ret;
+}
+
+static int
+__afr_mark_pending_lk_heal(xlator_t *this, afr_private_t *priv, int child)
+{
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+
+ if (priv->shd.iamshd)
+ return 0;
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_down_event_gen[child] = priv->event_generation;
+ if (info->locked_nodes[child] == 1)
+ info->locked_nodes[child] = 0;
+ if (!afr_has_quorum(info->locked_nodes, this, NULL)) {
+ /* Since the lock was lost on quorum no. of nodes, we should
+ * not attempt to heal it anymore. Some other client could have
+ * acquired the lock, modified data and released it and this
+ * client wouldn't know about it if we heal it.*/
+ afr_mark_fd_bad(info->fd, this);
+ list_del(&info->pos);
+ afr_lk_heal_info_cleanup(info);
+ /* We're not winding an unlock on the node where the lock is still
+ * present because when fencing logic switches over to the new
+ * client (since we marked the fd bad), it should preempt any
+ * existing lock. */
+ }
+ }
+ return 0;
+}
+
gf_boolean_t
afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
int32_t *op_errno)
@@ -70,6 +693,19 @@ afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
return _gf_true;
}
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata)
+{
+ int ret = 0;
+ uint32_t lk_mode = GF_LK_ADVISORY;
+
+ ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_mode);
+ if (!ret && lk_mode == GF_LK_MANDATORY)
+ return _gf_true;
+
+ return _gf_false;
+}
+
call_frame_t *
afr_copy_frame(call_frame_t *base)
{
@@ -98,21 +734,24 @@ afr_is_possibly_under_txn(afr_transaction_type type, afr_local_t *local,
int tmp = 0;
afr_private_t *priv = NULL;
GF_UNUSED char *key = NULL;
+ int keylen = 0;
priv = this->private;
- if (type == AFR_ENTRY_TRANSACTION)
+ if (type == AFR_ENTRY_TRANSACTION) {
key = GLUSTERFS_PARENT_ENTRYLK;
- else if (type == AFR_DATA_TRANSACTION)
+ keylen = SLEN(GLUSTERFS_PARENT_ENTRYLK);
+ } else if (type == AFR_DATA_TRANSACTION) {
/*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once
* pl_inodelk_xattr_fill supports separate keys for different
* domains.*/
key = GLUSTERFS_INODELK_COUNT;
-
+ keylen = SLEN(GLUSTERFS_INODELK_COUNT);
+ }
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].xdata)
continue;
- if (dict_get_int32(local->replies[i].xdata, key, &tmp) == 0)
+ if (dict_get_int32n(local->replies[i].xdata, key, keylen, &tmp) == 0)
if (tmp)
return _gf_true;
}
@@ -260,11 +899,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
count = gf_bits_count(tmp_map);
- if (count == 1)
- index = gf_bits_index(tmp_map);
-
for (i = 0; i < priv->child_count; i++) {
- mask = 0;
if (!local->transaction.failed_subvols[i])
continue;
@@ -278,25 +913,27 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
switch (txn_type) {
case AFR_METADATA_TRANSACTION:
if ((metadatamap_old != 0) && (metadatamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
local->transaction.in_flight_sb_errno = local->replies[index]
.op_errno;
local->transaction.in_flight_sb = _gf_true;
metadatamap |= (1 << index);
}
if (metadatamap_old != metadatamap) {
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
}
break;
case AFR_DATA_TRANSACTION:
if ((datamap_old != 0) && (datamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
local->transaction.in_flight_sb_errno = local->replies[index]
.op_errno;
local->transaction.in_flight_sb = _gf_true;
datamap |= (1 << index);
}
if (datamap_old != datamap)
- event = 0;
+ __afr_inode_need_refresh_set(inode, this);
break;
default:
@@ -460,34 +1097,6 @@ out:
}
int
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
-{
- int ret = -1;
- uint16_t datamap = 0;
- uint16_t metadatamap = 0;
- uint32_t event = 0;
- uint64_t val = 0;
- afr_inode_ctx_t *ctx = NULL;
-
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- return ret;
-
- val = ctx->read_subvol;
-
- metadatamap = (val & 0x000000000000ffff) >> 0;
- datamap = (val & 0x00000000ffff0000) >> 16;
- event = 0;
-
- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
- (((uint64_t)event) << 32);
-
- ctx->read_subvol = val;
-
- return ret;
-}
-
-int
__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -558,22 +1167,6 @@ out:
}
int
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- if (priv->child_count <= 16)
- ret = __afr_inode_event_gen_reset_small(inode, this);
- else
- ret = -1;
-
- return ret;
-}
-
-int
afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int *event_p)
{
@@ -639,12 +1232,11 @@ afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
return 0;
}
-int
+static int
afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
int *spb_choice)
{
int ret = -1;
-
GF_VALIDATE_OR_GOTO(this->name, inode, out);
LOCK(&inode->lock);
@@ -656,6 +1248,40 @@ out:
return ret;
}
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
+ */
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
+{
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
+
+ priv = this->private;
+
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
int
afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
unsigned char *metadata, int event)
@@ -722,30 +1348,22 @@ out:
return need_refresh;
}
-static int
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+int
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
afr_inode_ctx_t *ctx = NULL;
- GF_VALIDATE_OR_GOTO(this->name, inode, out);
-
- LOCK(&inode->lock);
- {
- ret = __afr_inode_ctx_get(this, inode, &ctx);
- if (ret)
- goto unlock;
-
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
ctx->need_refresh = _gf_true;
}
-unlock:
- UNLOCK(&inode->lock);
-out:
+
return ret;
}
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
{
int ret = -1;
@@ -753,7 +1371,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
LOCK(&inode->lock);
{
- ret = __afr_inode_event_gen_reset(inode, this);
+ ret = __afr_inode_need_refresh_set(inode, this);
}
UNLOCK(&inode->lock);
out:
@@ -773,6 +1391,7 @@ afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode)
{
ret = __afr_inode_ctx_get(this, inode, &ctx);
if (ret < 0 || !ctx) {
+ UNLOCK(&inode->lock);
gf_msg(this->name, GF_LOG_WARNING, 0,
AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
"Failed to cancel split-brain choice timer.");
@@ -785,8 +1404,8 @@ afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode)
}
ret = 0;
}
-out:
UNLOCK(&inode->lock);
+out:
return ret;
}
@@ -818,7 +1437,6 @@ afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
gf_boolean_t timer_set = _gf_false;
gf_boolean_t timer_cancelled = _gf_false;
gf_boolean_t timer_reset = _gf_false;
- gf_boolean_t need_invalidate = _gf_true;
int old_spb_choice = -1;
frame = data->frame;
@@ -862,10 +1480,11 @@ afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
{
ret = __afr_inode_ctx_get(this, inode, &ctx);
if (ret) {
+ UNLOCK(&inode->lock);
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
"Failed to get inode_ctx for %s", loc->name);
- goto unlock;
+ goto post_unlock;
}
old_spb_choice = ctx->spb_choice;
@@ -929,10 +1548,10 @@ afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
timer_set = _gf_true;
if (timer_reset && !ctx->timer)
timer_cancelled = _gf_true;
- need_invalidate = _gf_false;
}
unlock:
UNLOCK(&inode->lock);
+post_unlock:
if (!timer_set)
inode_unref(inode);
if (timer_cancelled)
@@ -942,8 +1561,7 @@ unlock:
* reads from an older cached value despite a change in spb_choice to
* a new value.
*/
- if (need_invalidate)
- inode_invalidate(inode);
+ inode_invalidate(inode);
out:
GF_FREE(data);
AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
@@ -988,7 +1606,7 @@ afr_accuse_smallfiles(xlator_t *this, struct afr_reply *replies,
for (i = 0; i < priv->child_count; i++) {
if (replies[i].valid && replies[i].xdata &&
- dict_get(replies[i].xdata, GLUSTERFS_BAD_INODE))
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))
continue;
if (data_accused[i])
continue;
@@ -1037,7 +1655,7 @@ afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
if (replies) { /* Lookup */
if (!replies[i].valid || replies[i].op_ret == -1 ||
(replies[i].xdata &&
- dict_get(replies[i].xdata, GLUSTERFS_BAD_INODE))) {
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))) {
data_readable[i] = 0;
metadata_readable[i] = 0;
continue;
@@ -1050,6 +1668,8 @@ afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
ia_type = inode->ia_type;
}
+ if (!xdata)
+ continue; /* mkdir_cbk sends NULL xdata_rsp. */
afr_accused_fill(this, xdata, data_accused,
(ia_type == IA_IFDIR) ? AFR_ENTRY_TRANSACTION
: AFR_DATA_TRANSACTION);
@@ -1153,18 +1773,12 @@ ret:
}
gf_boolean_t
-afr_selfheal_enabled(xlator_t *this)
+afr_selfheal_enabled(const xlator_t *this)
{
- afr_private_t *priv = NULL;
- gf_boolean_t data = _gf_false;
- int ret = 0;
+ const afr_private_t *priv = this->private;
- priv = this->private;
-
- ret = gf_string2boolean(priv->data_self_heal, &data);
- GF_ASSERT(!ret);
-
- return data || priv->metadata_self_heal || priv->entry_self_heal;
+ return priv->data_self_heal || priv->metadata_self_heal ||
+ priv->entry_self_heal;
}
int
@@ -1177,7 +1791,6 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
inode_t *inode = NULL;
int event_generation = 0;
int read_subvol = -1;
- int op_errno = ENOMEM;
int ret = 0;
local = frame->local;
@@ -1193,7 +1806,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
ret = afr_inode_get_readable(frame, inode, this, local->readable,
&event_generation, local->transaction.type);
- if (ret == -EIO || (local->is_read_txn && !event_generation)) {
+ if (ret == -EIO) {
/* No readable subvolume even after refresh ==> splitbrain.*/
if (!priv->fav_child_policy) {
err = EIO;
@@ -1206,18 +1819,12 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
goto refresh_done;
}
- heal_frame = copy_frame(frame);
+ heal_frame = afr_frame_create(this, NULL);
if (!heal_frame) {
err = EIO;
goto refresh_done;
}
- heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
- heal_local = AFR_FRAME_INIT(heal_frame, op_errno);
- if (!heal_local) {
- err = EIO;
- AFR_STACK_DESTROY(heal_frame);
- goto refresh_done;
- }
+ heal_local = heal_frame->local;
heal_local->xdata_req = dict_new();
if (!heal_local->xdata_req) {
err = EIO;
@@ -1238,18 +1845,6 @@ refresh_done:
return 0;
}
-static void
-afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
- unsigned char *replies)
-{
- int i = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->replies[i].valid && local->replies[i].op_ret == 0)
- replies[i] = 1;
- }
-}
-
int
afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
{
@@ -1259,7 +1854,6 @@ afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
gf_boolean_t start_heal = _gf_false;
afr_local_t *heal_local = NULL;
unsigned char *success_replies = NULL;
- int op_errno = ENOMEM;
int ret = 0;
if (error != 0) {
@@ -1271,32 +1865,32 @@ afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
success_replies = alloca0(priv->child_count);
afr_fill_success_replies(local, priv, success_replies);
- if (!afr_has_quorum(success_replies, this)) {
- error = afr_final_errno(frame->local, this->private);
- if (!error)
- error = afr_quorum_errno(priv);
- goto refresh_done;
- }
-
if (priv->thin_arbiter_count && local->is_read_txn &&
AFR_COUNT(success_replies, priv->child_count) != priv->child_count) {
/* We need to query the good bricks and/or thin-arbiter.*/
+ if (success_replies[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (success_replies[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
error = EINVAL;
goto refresh_done;
}
+ if (!afr_has_quorum(success_replies, this, frame)) {
+ error = afr_final_errno(frame->local, this->private);
+ if (!error)
+ error = afr_quorum_errno(priv);
+ goto refresh_done;
+ }
+
ret = afr_replies_interpret(frame, this, local->refreshinode, &start_heal);
if (ret && afr_selfheal_enabled(this) && start_heal) {
- heal_frame = copy_frame(frame);
+ heal_frame = afr_frame_create(this, NULL);
if (!heal_frame)
goto refresh_done;
- heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
- heal_local = AFR_FRAME_INIT(heal_frame, op_errno);
- if (!heal_local) {
- AFR_STACK_DESTROY(heal_frame);
- goto refresh_done;
- }
+ heal_local = heal_frame->local;
heal_local->refreshinode = inode_ref(local->refreshinode);
heal_local->heal_frame = heal_frame;
if (!afr_throttled_selfheal(heal_frame, this)) {
@@ -1333,17 +1927,22 @@ afr_inode_refresh_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (xdata)
local->replies[call_child].xdata = dict_ref(xdata);
}
+
if (xdata) {
ret = dict_get_int8(xdata, "link-count", &need_heal);
- local->replies[call_child].need_heal = need_heal;
- } else {
- local->replies[call_child].need_heal = need_heal;
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get link count");
+ }
}
+ local->replies[call_child].need_heal = need_heal;
call_count = afr_frame_return(frame);
if (call_count == 0) {
afr_set_need_heal(this, local);
ret = afr_inode_refresh_err(frame, this);
+ if (ret) {
+ gf_msg_debug(this->name, ret, "afr_inode_refresh_err failed");
+ }
afr_inode_refresh_done(frame, this, ret);
}
}
@@ -1452,12 +2051,12 @@ afr_inode_refresh_do(call_frame_t *frame, xlator_t *this)
return 0;
}
- ret = dict_set_str(xdata, "link-count", GF_XATTROP_INDEX_COUNT);
+ ret = dict_set_sizen_str_sizen(xdata, "link-count", GF_XATTROP_INDEX_COUNT);
if (ret) {
gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
}
- ret = dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
+ ret = dict_set_str_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
if (ret) {
gf_msg_debug(this->name, -ret,
"Unable to set inodelk-dom-count in dict ");
@@ -1555,7 +2154,7 @@ afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req)
"query flag");
}
- ret = dict_set_int32(xattr_req, "list-xattr", 1);
+ ret = dict_set_int32_sizen(xattr_req, "list-xattr", 1);
if (ret) {
gf_msg_debug(this->name, -ret, "Unable to set list-xattr in dict ");
}
@@ -1600,7 +2199,8 @@ afr_lookup_xattr_req_prepare(afr_local_t *local, xlator_t *this,
GLUSTERFS_PARENT_ENTRYLK);
}
- ret = dict_set_str(local->xattr_req, "link-count", GF_XATTROP_INDEX_COUNT);
+ ret = dict_set_sizen_str_sizen(local->xattr_req, "link-count",
+ GF_XATTROP_INDEX_COUNT);
if (ret) {
gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
}
@@ -1611,19 +2211,18 @@ out:
}
int
-afr_least_pending_reads_child(afr_private_t *priv)
+afr_least_pending_reads_child(afr_private_t *priv, unsigned char *readable)
{
int i = 0;
- int child = 0;
+ int child = -1;
int64_t read_iter = -1;
int64_t pending_read = -1;
- pending_read = GF_ATOMIC_GET(priv->pending_reads[0]);
- for (i = 1; i < priv->child_count; i++) {
- if (AFR_IS_ARBITER_BRICK(priv, i))
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i])
continue;
read_iter = GF_ATOMIC_GET(priv->pending_reads[i]);
- if (read_iter < pending_read) {
+ if (child == -1 || read_iter < pending_read) {
pending_read = read_iter;
child = i;
}
@@ -1632,8 +2231,54 @@ afr_least_pending_reads_child(afr_private_t *priv)
return child;
}
+static int32_t
+afr_least_latency_child(afr_private_t *priv, unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ if (child == -1 ||
+ priv->child_latency[i] < priv->child_latency[child]) {
+ child = i;
+ }
+ }
+ return child;
+}
+
+static int32_t
+afr_least_latency_times_pending_reads_child(afr_private_t *priv,
+ unsigned char *readable)
+{
+ int32_t i = 0;
+ int child = -1;
+ int64_t pending_read = 0;
+ int64_t latency = -1;
+ int64_t least_latency = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
+
+ pending_read = GF_ATOMIC_GET(priv->pending_reads[i]);
+ latency = (pending_read + 1) * priv->child_latency[i];
+
+ if (child == -1 || latency < least_latency) {
+ least_latency = latency;
+ child = i;
+ }
+ }
+ return child;
+}
+
int
-afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
+afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
+ unsigned char *readable)
{
uuid_t gfid_copy = {
0,
@@ -1642,14 +2287,14 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
int child = -1;
switch (priv->hash_mode) {
- case 0:
+ case AFR_READ_POLICY_FIRST_UP:
break;
- case 1:
+ case AFR_READ_POLICY_GFID_HASH:
gf_uuid_copy(gfid_copy, args->gfid);
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
break;
- case 2:
+ case AFR_READ_POLICY_GFID_PID_HASH:
if (args->ia_type != IA_IFDIR) {
/*
* Why getpid? Because it's one of the cheapest calls
@@ -1661,14 +2306,21 @@ afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv)
* need is a low probability that multiple clients
* won't converge on the same subvolume.
*/
+ gf_uuid_copy(gfid_copy, args->gfid);
pid = getpid();
- memcpy(gfid_copy, &pid, sizeof(pid));
+ *(pid_t *)gfid_copy ^= pid;
}
child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
priv->child_count;
break;
- case 3:
- child = afr_least_pending_reads_child(priv);
+ case AFR_READ_POLICY_LESS_LOAD:
+ child = afr_least_pending_reads_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LEAST_LATENCY:
+ child = afr_least_latency_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LOAD_LATENCY_HYBRID:
+ child = afr_least_latency_times_pending_reads_child(priv, readable);
break;
}
@@ -1701,7 +2353,7 @@ afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
}
/* second preference - use hashed mode */
- read_subvol = afr_hash_child(&local_args, priv);
+ read_subvol = afr_hash_child(&local_args, priv, readable);
if (read_subvol >= 0 && readable[read_subvol])
return read_subvol;
@@ -1796,11 +2448,9 @@ afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this)
afr_matrix_cleanup(local->pending, priv->child_count);
- GF_FREE(local->internal_lock.locked_nodes);
-
GF_FREE(local->internal_lock.lower_locked_nodes);
- afr_entry_lockee_cleanup(&local->internal_lock);
+ afr_lockees_cleanup(&local->internal_lock);
GF_FREE(local->transaction.pre_op);
@@ -2009,6 +2659,9 @@ afr_local_cleanup(afr_local_t *local, xlator_t *this)
{ /* lk */
GF_FREE(local->cont.lk.locked_nodes);
+ GF_FREE(local->cont.lk.dom_locked_nodes);
+ GF_FREE(local->cont.lk.dom_lock_op_ret);
+ GF_FREE(local->cont.lk.dom_lock_op_errno);
}
{ /* create */
@@ -2239,7 +2892,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = -1;
+ int spb_subvol = -1;
int child_count = -1;
if (*read_subvol != -1)
@@ -2249,13 +2902,15 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
local = frame->local;
child_count = priv->child_count;
- afr_inode_split_brain_choice_get(local->inode, this, &spb_choice);
- if ((spb_choice >= 0) &&
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
(AFR_COUNT(success_replies, child_count) == child_count)) {
- *read_subvol = spb_choice;
- } else if (!priv->quorum_count) {
+ *read_subvol = spb_subvol;
+ } else if (!priv->quorum_count ||
+ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
*read_subvol = afr_first_up_child(frame, this);
- } else if (priv->quorum_count && afr_has_quorum(data_readable, this)) {
+ } else if (priv->quorum_count &&
+ afr_has_quorum(data_readable, this, NULL)) {
/* read_subvol is guaranteed to be valid if we hit this path. */
*read_subvol = afr_first_up_child(frame, this);
} else {
@@ -2270,7 +2925,7 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
local->loc.path);
}
if (*read_subvol >= 0)
- dict_del(local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
+ dict_del_sizen(local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
}
static void
@@ -2291,6 +2946,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
0,
};
gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
gf_boolean_t can_interpret = _gf_true;
inode_t *parent = NULL;
ia_type_t ia_type = IA_INVAL;
@@ -2334,17 +2990,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (!replies[i].valid)
continue;
- if (locked_entry && replies[i].op_ret == -1 &&
- replies[i].op_errno == ENOENT) {
- /* Second, check entry is still
- "underway" in creation */
- local->op_ret = -1;
- local->op_errno = ENOENT;
- goto error;
- }
-
- if (replies[i].op_ret == -1)
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
continue;
+ }
if (read_subvol == -1 || !readable[read_subvol]) {
read_subvol = i;
@@ -2354,6 +3005,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
}
}
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
if (read_subvol == -1)
goto error;
/* We now have a read_subvol, which is readable[] (if there
@@ -2382,7 +3039,8 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
/* If we were called from glfsheal and there is still a gfid
* mismatch, succeed the lookup and let glfsheal print the
* response via gfid-heal-msg.*/
- if (!dict_get_str(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg))
+ if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
+ &gfid_heal_msg))
goto cant_interpret;
/* LOG ERROR */
@@ -2397,7 +3055,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
read_subvol = -1;
memset(readable, 0, sizeof(*readable) * priv->child_count);
if (can_interpret) {
- if (!afr_has_quorum(success_replies, this))
+ if (!afr_has_quorum(success_replies, this, NULL))
goto cant_interpret;
/* It is safe to call afr_replies_interpret() because we have
a response from all the UP subvolumes and all of them resolved
@@ -2411,8 +3069,8 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
if (read_subvol == -1)
goto cant_interpret;
if (ret) {
- afr_inode_event_gen_reset(local->inode, this);
- dict_del(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
+ afr_inode_need_refresh_set(local->inode, this);
+ dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
}
} else {
cant_interpret:
@@ -2436,10 +3094,10 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
goto error;
}
- ret = dict_get_str(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
+ ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
if (!ret) {
- ret = dict_set_str(local->replies[read_subvol].xdata, "gfid-heal-msg",
- gfid_heal_msg);
+ ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
+ "gfid-heal-msg", gfid_heal_msg);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
"Error setting gfid-heal-msg dict");
@@ -2464,7 +3122,7 @@ error:
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ENODATA > ENOENT > ESTALE > others
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
*/
int
@@ -2476,6 +3134,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno)
return ENOENT;
if (old_errno == ESTALE || new_errno == ESTALE)
return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
return new_errno;
}
@@ -2517,7 +3177,7 @@ afr_local_discovery_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
priv = this->private;
child_index = (int32_t)(long)cookie;
- ret = dict_get_str(dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
if (ret != 0) {
goto out;
}
@@ -2605,7 +3265,11 @@ afr_lookup_sh_metadata_wrap(void *opaque)
dict = dict_new();
if (!dict)
goto out;
- ret = dict_set_str(dict, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (local->xattr_req) {
+ dict_copy(local->xattr_req, dict);
+ }
+
+ ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT);
if (ret) {
gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
}
@@ -2613,7 +3277,7 @@ afr_lookup_sh_metadata_wrap(void *opaque)
if (loc_is_nameless(&local->loc)) {
ret = afr_selfheal_unlocked_discover_on(frame, local->inode,
local->loc.gfid, local->replies,
- local->child_up);
+ local->child_up, dict);
} else {
inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
local->loc.name, local->replies,
@@ -2787,7 +3451,7 @@ afr_lookup_selfheal_wrap(void *opaque)
inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
local->loc.name, local->replies,
- local->child_up, NULL);
+ local->child_up, local->xattr_req);
if (inode)
inode_unref(inode);
@@ -2874,7 +3538,7 @@ afr_lookup_entry_heal(call_frame_t *frame, xlator_t *this)
if (name_state_mismatch) {
if (!priv->quorum_count)
goto name_heal;
- if (!afr_has_quorum(success, this))
+ if (!afr_has_quorum(success, this, NULL))
goto name_heal;
if (op_errno)
goto name_heal;
@@ -2932,7 +3596,7 @@ afr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* with ESTALE so that a fresh lookup will be sent by the top xlator.
* So remember it.
*/
- if (xdata && dict_get(xdata, "gfid-changed"))
+ if (xdata && dict_get_sizen(xdata, "gfid-changed"))
local->cont.lookup.needs_fresh_lookup = _gf_true;
if (xdata) {
@@ -2962,8 +3626,8 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int op_errno = 0;
int read_subvol = -1;
+ int ret = 0;
unsigned char *data_readable = NULL;
unsigned char *success_replies = NULL;
@@ -2976,17 +3640,19 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
if (AFR_COUNT(success_replies, priv->child_count) > 0)
local->op_ret = 0;
- op_errno = afr_final_errno(frame->local, this->private);
-
if (local->op_ret < 0) {
- AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return;
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(frame->local, this->private);
+ goto error;
}
- if (!afr_has_quorum(success_replies, this))
+ if (!afr_has_quorum(success_replies, this, frame))
goto unwind;
- afr_replies_interpret(frame, this, local->inode, NULL);
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
data_readable);
@@ -2994,11 +3660,8 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
unwind:
afr_attempt_readsubvol_set(frame, this, success_replies, data_readable,
&read_subvol);
- if (read_subvol == -1) {
- AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL,
- NULL, NULL, NULL);
- return;
- }
+ if (read_subvol == -1)
+ goto error;
if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
local->op_ret = -1;
@@ -3013,6 +3676,11 @@ unwind:
local->inode, &local->replies[read_subvol].poststat,
local->replies[read_subvol].xdata,
&local->replies[read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
}
static int
@@ -3036,7 +3704,7 @@ afr_ta_id_file_check(void *opaque)
this = opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -3209,8 +3877,6 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
}
if (__is_root_gfid(loc->inode->gfid)) {
- if (!this->itable)
- this->itable = loc->inode->table;
if (!priv->root_inode)
priv->root_inode = inode_ref(loc->inode);
@@ -3229,10 +3895,15 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
local->inode = inode_ref(loc->inode);
- if (xattr_req)
+ if (xattr_req) {
/* If xattr_req was null, afr_lookup_xattr_req_prepare() will
allocate one for us */
- local->xattr_req = dict_ref(xattr_req);
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
if (gf_uuid_is_null(loc->inode->gfid)) {
afr_discover_do(frame, this, 0);
@@ -3242,11 +3913,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
- else
- afr_discover_do(frame, this, 0);
+ afr_discover_do(frame, this, 0);
return 0;
out:
@@ -3268,7 +3935,6 @@ afr_lookup_do(call_frame_t *frame, xlator_t *this, int err)
if (err < 0) {
local->op_errno = err;
- ret = -1;
goto out;
}
@@ -3279,7 +3945,6 @@ afr_lookup_do(call_frame_t *frame, xlator_t *this, int err)
&local->loc);
if (ret) {
local->op_errno = -ret;
- ret = -1;
goto out;
}
@@ -3344,16 +4009,15 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
if (loc_is_nameless(loc)) {
if (xattr_req)
- dict_del(xattr_req, "gfid-req");
+ dict_del_sizen(xattr_req, "gfid-req");
afr_discover(frame, this, loc, xattr_req);
return 0;
}
- if (__is_root_gfid(loc->parent->gfid)) {
- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) {
- op_errno = EPERM;
- goto out;
- }
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
}
local = AFR_FRAME_INIT(frame, op_errno);
@@ -3382,18 +4046,14 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
ret = dict_get_gfuuid(local->xattr_req, "gfid-req",
&local->cont.lookup.gfid_req);
if (ret == 0) {
- dict_del(local->xattr_req, "gfid-req");
+ dict_del_sizen(local->xattr_req, "gfid-req");
}
}
afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
- local->event_generation))
- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
- else
- afr_lookup_do(frame, this, 0);
+ afr_lookup_do(frame, this, 0);
return 0;
out:
@@ -3403,8 +4063,18 @@ out:
}
void
-_afr_cleanup_fd_ctx(afr_fd_ctx_t *fd_ctx)
+_afr_cleanup_fd_ctx(xlator_t *this, afr_fd_ctx_t *fd_ctx)
{
+ afr_private_t *priv = this->private;
+
+ if (fd_ctx->lk_heal_info) {
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ afr_lk_heal_info_cleanup(fd_ctx->lk_heal_info);
+ fd_ctx->lk_heal_info = NULL;
+ }
GF_FREE(fd_ctx->opened_on);
GF_FREE(fd_ctx);
return;
@@ -3424,7 +4094,7 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long)ctx;
if (fd_ctx) {
- _afr_cleanup_fd_ctx(fd_ctx);
+ _afr_cleanup_fd_ctx(this, fd_ctx);
}
out:
@@ -3517,13 +4187,14 @@ __afr_fd_ctx_set(xlator_t *this, fd_t *fd)
}
fd_ctx->readdir_subvol = -1;
+ fd_ctx->lk_heal_info = NULL;
ret = __fd_ctx_set(fd, this, (uint64_t)(long)fd_ctx);
if (ret)
gf_msg_debug(this->name, 0, "failed to set fd ctx (%p)", fd);
out:
if (ret && fd_ctx)
- _afr_cleanup_fd_ctx(fd_ctx);
+ _afr_cleanup_fd_ctx(this, fd_ctx);
return ret;
}
@@ -3547,11 +4218,10 @@ afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
} else {
local->op_errno = op_errno;
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno,
local->xdata_rsp);
@@ -3647,6 +4317,7 @@ afr_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
call_stub_t *stub = NULL;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -3687,11 +4358,10 @@ afr_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
} else {
local->op_errno = op_errno;
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
local->xdata_rsp);
@@ -4008,7 +4678,7 @@ afr_fop_lock_done(call_frame_t *frame, xlator_t *this)
if (afr_is_conflicting_lock_present(local->op_ret, local->op_errno)) {
afr_unlock_locks_and_proceed(frame, this, lock_count);
- } else if (priv->quorum_count && !afr_has_quorum(success, this)) {
+ } else if (priv->quorum_count && !afr_has_quorum(success, this, NULL)) {
local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;
local->op_ret = -1;
local->op_errno = afr_final_errno(local, priv);
@@ -4184,9 +4854,9 @@ out:
}
static int32_t
-afr_handle_inodelk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
- loc_t *loc, fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata)
+afr_handle_inodelk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
afr_local_t *local = NULL;
int32_t op_errno = ENOMEM;
@@ -4198,8 +4868,10 @@ afr_handle_inodelk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
local->op = fop;
if (loc)
loc_copy(&local->loc, loc);
- if (fd)
+ if (fd && (flock->l_type != F_UNLCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local->fd = fd_ref(fd);
+ }
local->cont.inodelk.volume = gf_strdup(volume);
if (!local->cont.inodelk.volume) {
@@ -4228,8 +4900,8 @@ int32_t
afr_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_handle_inodelk(frame, GF_FOP_INODELK, volume, loc, NULL, cmd, flock,
- xdata);
+ afr_handle_inodelk(frame, this, GF_FOP_INODELK, volume, loc, NULL, cmd,
+ flock, xdata);
return 0;
}
@@ -4237,15 +4909,16 @@ int32_t
afr_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_handle_inodelk(frame, GF_FOP_FINODELK, volume, NULL, fd, cmd, flock,
- xdata);
+ afr_handle_inodelk(frame, this, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+ flock, xdata);
return 0;
}
static int
-afr_handle_entrylk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
- loc_t *loc, fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+afr_handle_entrylk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
afr_local_t *local = NULL;
int32_t op_errno = ENOMEM;
@@ -4257,8 +4930,10 @@ afr_handle_entrylk(call_frame_t *frame, glusterfs_fop_t fop, const char *volume,
local->op = fop;
if (loc)
loc_copy(&local->loc, loc);
- if (fd)
+ if (fd && (cmd != ENTRYLK_UNLOCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local->fd = fd_ref(fd);
+ }
local->cont.entrylk.cmd = cmd;
local->cont.entrylk.in_cmd = cmd;
local->cont.entrylk.type = type;
@@ -4285,8 +4960,8 @@ afr_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- afr_handle_entrylk(frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename, cmd,
- type, xdata);
+ afr_handle_entrylk(frame, this, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+ cmd, type, xdata);
return 0;
}
@@ -4295,8 +4970,8 @@ afr_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- afr_handle_entrylk(frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename, cmd,
- type, xdata);
+ afr_handle_entrylk(frame, this, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+ cmd, type, xdata);
return 0;
}
@@ -4308,10 +4983,10 @@ afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int call_count = 0;
struct statvfs *buf = NULL;
+ local = frame->local;
+
LOCK(&frame->lock);
{
- local = frame->local;
-
if (op_ret != 0) {
local->op_errno = op_errno;
goto unlock;
@@ -4337,10 +5012,9 @@ afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
}
}
unlock:
+ call_count = --local->call_count;
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0)
AFR_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
&local->cont.statfs.buf, local->xdata_rsp);
@@ -4415,9 +5089,10 @@ afr_lk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
call_count = afr_frame_return(frame);
- if (call_count == 0)
+ if (call_count == 0) {
AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
local->xdata_rsp);
+ }
return 0;
}
@@ -4499,7 +5174,7 @@ afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
local->cont.lk.cmd, &local->cont.lk.user_flock,
local->xdata_req);
} else if (priv->quorum_count &&
- !afr_has_quorum(local->cont.lk.locked_nodes, this)) {
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
local->op_ret = -1;
local->op_errno = afr_final_errno(local, priv);
@@ -4516,11 +5191,133 @@ afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
}
int
+afr_lk_transaction_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ return 0;
+}
+
+int
+afr_lk_txn_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = -1;
+
+ local = frame->local;
+ child_index = (long)cookie;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_lk_txn_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int child_index = (long)cookie;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+ return 0;
+}
+int
+afr_lk_transaction(void *opaque)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *wind_on = NULL;
+ int op_errno = 0;
+ int i = 0;
+ int ret = 0;
+
+ frame = (call_frame_t *)opaque;
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+
+ if (priv->arbiter_count || priv->child_count != 3) {
+ op_errno = ENOTSUP;
+ gf_msg(frame->this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Lock healing supported only for replica 3 volumes.",
+ uuid_utoa(local->fd->inode->gfid));
+ goto err;
+ }
+
+ op_errno = -afr_dom_lock_acquire(frame); // Released during
+ // AFR_STACK_UNWIND
+ if (op_errno != 0) {
+ goto err;
+ }
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.dom_locked_nodes, this, NULL)) {
+ op_errno = afr_final_errno(local, priv);
+ goto err;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && local->cont.lk.dom_locked_nodes[i])
+ wind_on[i] = 1;
+ }
+ AFR_ONLIST(wind_on, frame, afr_lk_txn_wind_cbk, lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ goto unlock;
+ } else {
+ if (local->cont.lk.user_flock.l_type == F_UNLCK)
+ ret = afr_remove_lock_from_saved_locks(local, this);
+ else
+ ret = afr_add_lock_to_saved_locks(frame, this);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
+
+ return 0;
+
+unlock:
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+ AFR_ONLIST(local->cont.lk.locked_nodes, frame, afr_lk_txn_unlock_cbk, lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock, NULL);
+err:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return -1;
+}
+
+int
afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
+ int ret = 0;
int i = 0;
int32_t op_errno = ENOMEM;
@@ -4531,9 +5328,11 @@ afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
goto out;
local->op = GF_FOP_LK;
- if (!afr_lk_is_unlock(cmd, flock) &&
- !afr_is_consistent_io_possible(local, priv, &op_errno))
- goto out;
+ if (!afr_lk_is_unlock(cmd, flock)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+ }
local->cont.lk.locked_nodes = GF_CALLOC(
priv->child_count, sizeof(*local->cont.lk.locked_nodes),
@@ -4551,6 +5350,16 @@ afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
if (xdata)
local->xdata_req = dict_ref(xdata);
+ if (afr_is_lock_mode_mandatory(xdata)) {
+ ret = synctask_new(this->ctx->env, afr_lk_transaction,
+ afr_lk_transaction_cbk, frame, frame);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ return 0;
+ }
+
STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)0, priv->children[i],
priv->children[i]->fops->lk, fd, cmd, flock,
local->xdata_req);
@@ -4654,7 +5463,7 @@ afr_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
priv->children[child_index]->fops->lease, &local->loc,
&local->cont.lease.user_lease, xdata);
} else if (priv->quorum_count &&
- !afr_has_quorum(local->cont.lease.locked_nodes, this)) {
+ !afr_has_quorum(local->cont.lease.locked_nodes, this, NULL)) {
local->op_ret = -1;
local->op_errno = afr_final_errno(local, priv);
@@ -4872,8 +5681,10 @@ afr_priv_dump(xlator_t *this)
GF_ATOMIC_GET(priv->pending_reads[i]));
sprintf(key, "child_latency[%d]", i);
gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
}
- gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
+ gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
gf_proc_dump_write("read_child", "%d", priv->read_child);
@@ -4884,6 +5695,7 @@ afr_priv_dump(xlator_t *this)
priv->background_self_heal_count);
gf_proc_dump_write("healers", "%d", priv->healers);
gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
if (priv->quorum_count == AFR_QUORUM_AUTO) {
gf_proc_dump_write("quorum-type", "auto");
} else if (priv->quorum_count == 0) {
@@ -4892,7 +5704,7 @@ afr_priv_dump(xlator_t *this)
gf_proc_dump_write("quorum-type", "fixed");
gf_proc_dump_write("quorum-count", "%d", priv->quorum_count);
}
- gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this));
+ gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this, NULL));
if (priv->thin_arbiter_count) {
gf_proc_dump_write("ta_child_up", "%d", priv->ta_child_up);
gf_proc_dump_write("ta_bad_child_index", "%d",
@@ -4944,13 +5756,31 @@ __afr_get_up_children_count(afr_private_t *priv)
return up_children;
}
+static int
+__get_heard_from_all_status(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int i;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ return 0;
+ }
+ }
+ if (priv->thin_arbiter_count && !priv->ta_child_up) {
+ return 0;
+ }
+ return 1;
+}
+
glusterfs_event_t
-__afr_transform_event_from_state(afr_private_t *priv)
+__afr_transform_event_from_state(xlator_t *this)
{
int i = 0;
int up_children = 0;
+ afr_private_t *priv = this->private;
- if (AFR_COUNT(priv->last_event, priv->child_count) == priv->child_count)
+ if (__get_heard_from_all_status(this))
/* have_heard_from_all. Let afr_notify() do the propagation. */
return GF_EVENT_MAXVAL;
@@ -4992,7 +5822,7 @@ afr_notify_cbk(void *data)
goto unlock;
}
priv->timer = NULL;
- event = __afr_transform_event_from_state(priv);
+ event = __afr_transform_event_from_state(this);
if (event != GF_EVENT_MAXVAL)
propagate = _gf_true;
}
@@ -5019,22 +5849,6 @@ __afr_launch_notify_timer(xlator_t *this, afr_private_t *priv)
}
}
-int
-__get_heard_from_all_status(xlator_t *this)
-{
- afr_private_t *priv = this->private;
- int heard_from_all = 1;
- int i = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i]) {
- heard_from_all = 0;
- break;
- }
- }
- return heard_from_all;
-}
-
static int
find_best_down_child(xlator_t *this)
{
@@ -5046,7 +5860,7 @@ find_best_down_child(xlator_t *this)
priv = this->private;
for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
priv->child_latency[i] < best_latency) {
best_child = i;
best_latency = priv->child_latency[i];
@@ -5118,7 +5932,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child down.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_DOWN;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
}
} else if (child_latency_msec < halo_max_latency_msec &&
priv->child_up[idx] == 0) {
@@ -5130,7 +5946,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
"), "
"marking child up.",
child_latency_msec, halo_max_latency_msec);
- *event = GF_EVENT_CHILD_UP;
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
} else {
gf_log(child_xlator->name, GF_LOG_INFO,
"Not marking child %d up, "
@@ -5192,9 +6010,15 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
* want to set the child_latency to MAX to indicate
* the child needs ping data to be available before doing child-up
*/
- if (child_latency_msec < 0 && priv->halo_enabled) {
+ if (!priv->halo_enabled)
+ goto out;
+
+ if (child_latency_msec < 0) {
/*set to INT64_MAX-1 so that it is found for best_down_child*/
- priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
}
/*
@@ -5232,13 +6056,14 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
"up_children (%d) > halo_max_replicas (%d)",
worst_up_child, up_children, priv->halo_max_replicas);
}
-
+out:
if (up_children == 1) {
gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP,
"Subvolume '%s' came back up; "
"going online.",
child_xlator->name);
- gf_event(EVENT_AFR_SUBVOL_UP, "subvol=%s", this->name);
+ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
} else {
*event = GF_EVENT_SOME_DESCENDENT_UP;
}
@@ -5282,6 +6107,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
*/
if (child_latency_msec < 0) {
priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
}
priv->child_up[idx] = 0;
@@ -5294,7 +6120,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
* as we want it to be up to date if we are going to
* begin using it synchronously.
*/
- if (up_children < priv->halo_min_replicas) {
+ if (priv->halo_enabled && up_children < priv->halo_min_replicas) {
best_down_child = find_best_down_child(this);
if (best_down_child >= 0) {
gf_msg_debug(this->name, 0,
@@ -5306,7 +6132,6 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
*up_child = best_down_child;
}
}
-
for (i = 0; i < priv->child_count; i++)
if (priv->child_up[i] == 0)
down_children++;
@@ -5315,7 +6140,8 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
"All subvolumes are down. Going "
"offline until at least one of them "
"comes back up.");
- gf_event(EVENT_AFR_SUBVOLS_DOWN, "subvol=%s", this->name);
+ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
} else {
*event = GF_EVENT_SOME_DESCENDENT_DOWN;
}
@@ -5364,6 +6190,11 @@ afr_handle_inodelk_contention(xlator_t *this, struct gf_upcall *upcall)
}
LOCK(&priv->lock);
{
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Ignore multiple release requests from shds.*/
+ UNLOCK(&priv->lock);
+ return;
+ }
priv->release_ta_notify_dom_lock = _gf_true;
inmem_count = priv->ta_in_mem_txn_count;
onwire_count = priv->ta_on_wire_txn_count;
@@ -5371,7 +6202,7 @@ afr_handle_inodelk_contention(xlator_t *this, struct gf_upcall *upcall)
UNLOCK(&priv->lock);
if (inmem_count || onwire_count)
/* lock release will happen in txn code path after
- * inflight or on-wire txns are over.*/
+ * in-memory or on-wire txns are over.*/
return;
afr_ta_lock_release_synctask(this);
@@ -5470,14 +6301,15 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
goto out;
}
- had_quorum = priv->quorum_count && afr_has_quorum(priv->child_up, this);
- if (priv->halo_enabled) {
- halo_max_latency_msec = afr_get_halo_latency(this);
+ had_quorum = priv->quorum_count &&
+ afr_has_quorum(priv->child_up, this, NULL);
+ if (event == GF_EVENT_CHILD_PING) {
+ child_latency_msec = (int64_t)(uintptr_t)data2;
+ if (priv->halo_enabled) {
+ halo_max_latency_msec = afr_get_halo_latency(this);
- if (event == GF_EVENT_CHILD_PING) {
/* Calculates the child latency and sets event
*/
- child_latency_msec = (int64_t)(uintptr_t)data2;
LOCK(&priv->lock);
{
__afr_handle_ping_event(this, child_xlator, idx,
@@ -5485,6 +6317,12 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
child_latency_msec);
}
UNLOCK(&priv->lock);
+ } else {
+ LOCK(&priv->lock);
+ {
+ priv->child_latency[idx] = child_latency_msec;
+ }
+ UNLOCK(&priv->lock);
}
}
@@ -5528,22 +6366,27 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
if (priv->thin_arbiter_count &&
(idx == AFR_CHILD_THIN_ARBITER)) {
priv->ta_child_up = 1;
+ priv->ta_event_gen++;
break;
}
__afr_handle_child_up_event(this, child_xlator, idx,
child_latency_msec, &event,
&call_psh, &up_child);
+ __afr_lock_heal_synctask(this, priv, idx);
break;
case GF_EVENT_CHILD_DOWN:
if (priv->thin_arbiter_count &&
(idx == AFR_CHILD_THIN_ARBITER)) {
priv->ta_child_up = 0;
+ priv->ta_event_gen++;
+ afr_ta_locked_priv_invalidate(priv);
break;
}
__afr_handle_child_down_event(this, child_xlator, idx,
child_latency_msec, &event,
&call_psh, &up_child);
+ __afr_mark_pending_lk_heal(this, priv, idx);
break;
case GF_EVENT_CHILD_CONNECTING:
@@ -5585,16 +6428,18 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
UNLOCK(&priv->lock);
if (priv->quorum_count) {
- has_quorum = afr_has_quorum(priv->child_up, this);
+ has_quorum = afr_has_quorum(priv->child_up, this, NULL);
if (!had_quorum && has_quorum) {
gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
"Client-quorum is met");
- gf_event(EVENT_AFR_QUORUM_MET, "subvol=%s", this->name);
+ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
}
if (had_quorum && !has_quorum) {
gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
"Client-quorum is not met");
- gf_event(EVENT_AFR_QUORUM_FAIL, "subvol=%s", this->name);
+ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
}
}
@@ -5613,10 +6458,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
* b) Already heard from everyone, but we now got a child-up
* event.
*/
- if (have_heard_from_all && priv->shd.iamshd) {
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i])
- afr_selfheal_childup(this, i);
+ if (have_heard_from_all) {
+ afr_selfheal_childup(this, priv);
}
}
out:
@@ -5637,7 +6480,7 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
goto out;
}
- local->child_up = GF_CALLOC(priv->child_count, sizeof(*local->child_up),
+ local->child_up = GF_MALLOC(priv->child_count * sizeof(*local->child_up),
gf_afr_mt_char);
if (!local->child_up) {
if (op_errno)
@@ -5696,7 +6539,11 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
if (priv->thin_arbiter_count) {
local->ta_child_up = priv->ta_child_up;
local->ta_failed_subvol = AFR_CHILD_UNKNOWN;
+ local->read_txn_query_child = AFR_CHILD_UNKNOWN;
+ local->ta_event_gen = priv->ta_event_gen;
+ local->fop_state = TA_SUCCESS;
}
+ local->is_new_entry = _gf_false;
INIT_LIST_HEAD(&local->healer);
return 0;
@@ -5709,11 +6556,6 @@ afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count)
{
int ret = -ENOMEM;
- lk->locked_nodes = GF_CALLOC(sizeof(*lk->locked_nodes), child_count,
- gf_afr_mt_char);
- if (NULL == lk->locked_nodes)
- goto out;
-
lk->lower_locked_nodes = GF_CALLOC(sizeof(*lk->lower_locked_nodes),
child_count, gf_afr_mt_char);
if (NULL == lk->lower_locked_nodes)
@@ -5771,6 +6613,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
afr_private_t *priv = NULL;
priv = this->private;
+ INIT_LIST_HEAD(&local->transaction.wait_list);
+ INIT_LIST_HEAD(&local->transaction.owner_list);
+ INIT_LIST_HEAD(&local->ta_waitq);
+ INIT_LIST_HEAD(&local->ta_onwireq);
ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
if (ret < 0)
goto out;
@@ -5808,10 +6654,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this)
goto out;
ret = 0;
- INIT_LIST_HEAD(&local->transaction.wait_list);
- INIT_LIST_HEAD(&local->transaction.owner_list);
- INIT_LIST_HEAD(&local->ta_waitq);
- INIT_LIST_HEAD(&local->ta_onwireq);
out:
return ret;
}
@@ -5830,6 +6672,8 @@ afr_priv_destroy(afr_private_t *priv)
if (!priv)
goto out;
+
+ GF_FREE(priv->sh_domain);
GF_FREE(priv->last_event);
child_count = priv->child_count;
@@ -5845,7 +6689,9 @@ afr_priv_destroy(afr_private_t *priv)
GF_FREE(priv->local);
GF_FREE(priv->pending_key);
GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
GF_FREE(priv->child_latency);
LOCK_DESTROY(&priv->lock);
@@ -5896,263 +6742,218 @@ out:
return changelog;
}
-gf_boolean_t
-afr_decide_heal_info(afr_private_t *priv, unsigned char *sources, int source)
+static dict_t *
+afr_set_heal_info(char *status)
{
- int sources_count = 0;
+ dict_t *dict = NULL;
+ int ret = -1;
- if (source < 0)
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
goto out;
+ }
- sources_count = AFR_COUNT(sources, priv->child_count);
- if (sources_count == priv->child_count)
- return _gf_false;
+ ret = dict_set_dynstr_sizen(dict, "heal-info", status);
+ if (ret)
+ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
out:
- return _gf_true;
+ /* Any error other than EINVAL, dict_set_dynstr frees status */
+ if (ret == -ENOMEM || ret == -EINVAL) {
+ GF_FREE(status);
+ }
+
+ if (ret && dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return dict;
}
-int
-afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *msh,
- unsigned char *pending)
+static gf_boolean_t
+afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type)
{
- int ret = -1;
- unsigned char *locked_on = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- unsigned char *undid_pending = NULL;
- struct afr_reply *locked_replies = NULL;
-
afr_private_t *priv = this->private;
+ int *dirty = alloca0(priv->child_count * sizeof(int));
+ int i = 0;
- locked_on = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
- undid_pending = alloca0(priv->child_count);
-
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
-
- ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
- locked_on);
- {
- if (ret == 0) {
- /* Not a single lock */
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
- }
- ret = __afr_selfheal_metadata_prepare(
- frame, this, inode, locked_on, sources, sinks, healed_sinks,
- undid_pending, locked_replies, pending);
- *msh = afr_decide_heal_info(priv, sources, ret);
+ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (dirty[i] > 1)
+ return _gf_true;
}
- afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
- locked_on);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
- return ret;
+
+ return _gf_false;
}
-int
-afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_boolean_t *dsh, unsigned char *pflag)
+static gf_boolean_t
+afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type)
{
- int ret = -1;
- unsigned char *data_lock = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- unsigned char *undid_pending = NULL;
- afr_private_t *priv = NULL;
- struct afr_reply *locked_replies = NULL;
- inode_t *inode = fd->inode;
+ gf_boolean_t data_chk = _gf_false;
+ gf_boolean_t mdata_chk = _gf_false;
+ gf_boolean_t entry_chk = _gf_false;
- priv = this->private;
- data_lock = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
- undid_pending = alloca0(priv->child_count);
-
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+ switch (ia_type) {
+ case IA_IFDIR:
+ mdata_chk = _gf_true;
+ entry_chk = _gf_true;
+ break;
+ case IA_IFREG:
+ mdata_chk = _gf_true;
+ data_chk = _gf_true;
+ break;
+ default:
+ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/
+ mdata_chk = _gf_true;
+ break;
+ }
- ret = afr_selfheal_inodelk(frame, this, inode, this->name, 0, 0, data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
- }
- ret = __afr_selfheal_data_prepare(frame, this, inode, data_lock,
- sources, sinks, healed_sinks,
- undid_pending, locked_replies, pflag);
- *dsh = afr_decide_heal_info(priv, sources, ret);
+ if (data_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_DATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_METADATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_ENTRY_TRANSACTION)) {
+ return _gf_true;
}
- afr_selfheal_uninodelk(frame, this, inode, this->name, 0, 0, data_lock);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
- return ret;
+
+ return _gf_false;
}
-int
-afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this,
- inode_t *inode, gf_boolean_t *esh,
- unsigned char *pflag)
+static int
+afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
+ gf_boolean_t *msh, unsigned char pending)
{
int ret = -1;
- int source = -1;
+ GF_UNUSED int ret1 = 0;
+ int i = 0;
+ int io_domain_lk_count = 0;
+ int shd_domain_lk_count = 0;
afr_private_t *priv = NULL;
- unsigned char *locked_on = NULL;
- unsigned char *data_lock = NULL;
- unsigned char *sources = NULL;
- unsigned char *sinks = NULL;
- unsigned char *healed_sinks = NULL;
- struct afr_reply *locked_replies = NULL;
- gf_boolean_t granular_locks = _gf_false;
+ char *key1 = NULL;
+ char *key2 = NULL;
priv = this->private;
- if (strcmp("granular", priv->locking_scheme) == 0)
- granular_locks = _gf_true;
- locked_on = alloca0(priv->child_count);
- data_lock = alloca0(priv->child_count);
- sources = alloca0(priv->child_count);
- sinks = alloca0(priv->child_count);
- healed_sinks = alloca0(priv->child_count);
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
-
- if (!granular_locks) {
- ret = afr_selfheal_tryentrylk(frame, this, inode, priv->sh_domain, NULL,
- locked_on);
- }
- {
- if (!granular_locks && ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN; /* all invalid responses */
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if ((replies[i].valid != 1) || (replies[i].op_ret != 0))
+ continue;
+ if (!io_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count);
+ }
+ if (!shd_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count);
}
+ }
- ret = afr_selfheal_entrylk(frame, this, inode, this->name, NULL,
- data_lock);
- {
- if (ret == 0) {
- ret = -afr_final_errno(frame->local, priv);
- if (ret == 0)
- ret = -ENOTCONN;
- /* all invalid responses */
- goto unlock;
- }
- ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock,
- sources, sinks, healed_sinks,
- locked_replies, &source, pflag);
- if ((ret == 0) && (*pflag & PFLAG_SBRAIN))
- ret = -EIO;
- *esh = afr_decide_heal_info(priv, sources, ret);
+ if (!pending) {
+ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
+ (!io_domain_lk_count)) {
+ /* Needs heal. */
+ ret = 0;
+ } else {
+ /* No heal needed. */
+ *dsh = *esh = *msh = 0;
+ }
+ } else {
+ if (shd_domain_lk_count) {
+ ret = -EAGAIN; /*For 'possibly-healing'. */
+ } else {
+ ret = 0; /*needs heal. Just set a non -ve value so that it is
+ assumed as the source index.*/
}
- afr_selfheal_unentrylk(frame, this, inode, this->name, NULL, data_lock,
- NULL);
}
-unlock:
- if (!granular_locks)
- afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL,
- locked_on, NULL);
-out:
- if (locked_replies)
- afr_replies_wipe(locked_replies, priv->child_count);
return ret;
}
+/*return EIO, EAGAIN or pending*/
int
-afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
- inode_t **inode, gf_boolean_t *entry_selfheal,
- gf_boolean_t *data_selfheal,
- gf_boolean_t *metadata_selfheal,
- unsigned char *pending)
-
+afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode, gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal, unsigned char *pending)
{
int ret = -1;
- fd_t *fd = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
gf_boolean_t dsh = _gf_false;
gf_boolean_t msh = _gf_false;
gf_boolean_t esh = _gf_false;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *valid_on = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+ sources = alloca0(sizeof(*sources) * priv->child_count);
+ sinks = alloca0(sizeof(*sinks) * priv->child_count);
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ valid_on = alloca0(sizeof(*valid_on) * priv->child_count);
ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh,
- &esh);
+ &esh, replies);
if (ret)
goto out;
-
- /* For every heal type hold locks and check if it indeed needs heal */
-
- /* Heal-info does an open() on the file being examined so that the
- * current eager-lock holding client, if present, at some point sees
- * open-fd count being > 1 and releases the eager-lock so that heal-info
- * doesn't remain blocked forever until IO completes.
- */
- if ((*inode)->ia_type == IA_IFREG) {
- ret = afr_selfheal_data_open(this, *inode, &fd);
- if (ret < 0) {
- gf_msg_debug(this->name, -ret, "%s: Failed to open",
- uuid_utoa((*inode)->gfid));
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ valid_on[i] = 1;
}
}
-
if (msh) {
- ret = afr_selfheal_locked_metadata_inspect(frame, this, *inode, &msh,
- pending);
- if (ret == -EIO)
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
goto out;
}
-
if (dsh) {
- ret = afr_selfheal_locked_data_inspect(frame, this, fd, &dsh, pending);
- if (ret == -EIO || (ret == -EAGAIN))
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
goto out;
}
-
if (esh) {
- ret = afr_selfheal_locked_entry_inspect(frame, this, *inode, &esh,
- pending);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
}
+ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
+ &msh, *pending);
out:
*data_selfheal = dsh;
*entry_selfheal = esh;
*metadata_selfheal = msh;
- if (fd)
- fd_unref(fd);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
return ret;
}
-dict_t *
-afr_set_heal_info(char *status)
-{
- dict_t *dict = NULL;
- int ret = -1;
-
- dict = dict_new();
- if (!dict) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = dict_set_str(dict, "heal-info", status);
- if (ret)
- gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
- "Failed to set heal-info key to "
- "%s",
- status);
-out:
- return dict;
-}
-
int
afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
@@ -6162,17 +6963,27 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
unsigned char pending = 0;
dict_t *dict = NULL;
int ret = -1;
- int op_errno = 0;
+ int op_errno = ENOMEM;
inode_t *inode = NULL;
char *substr = NULL;
char *status = NULL;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+
+ /*Use frame with lk-owner set*/
+ heal_frame = afr_frame_create(frame->this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
- ret = afr_selfheal_locked_inspect(frame, this, loc->gfid, &inode,
- &entry_selfheal, &data_selfheal,
- &metadata_selfheal, &pending);
+ ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode,
+ &entry_selfheal, &data_selfheal,
+ &metadata_selfheal, &pending);
if (ret == -ENOMEM) {
- op_errno = -ret;
ret = -1;
goto out;
}
@@ -6185,26 +6996,50 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
if (ret == -EIO) {
ret = gf_asprintf(&status, "split-brain%s", substr ? substr : "");
- if (ret < 0)
+ if (ret < 0) {
goto out;
+ }
dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
} else if (ret == -EAGAIN) {
ret = gf_asprintf(&status, "possibly-healing%s", substr ? substr : "");
- if (ret < 0)
+ if (ret < 0) {
goto out;
+ }
dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
} else if (ret >= 0) {
/* value of ret = source index
* so ret >= 0 and at least one of the 3 booleans set to
* true means a source is identified; heal is required.
*/
if (!data_selfheal && !entry_selfheal && !metadata_selfheal) {
- dict = afr_set_heal_info("no-heal");
+ status = gf_strdup("no-heal");
+ if (!status) {
+ ret = -1;
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
} else {
ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
- if (ret < 0)
+ if (ret < 0) {
goto out;
+ }
dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
}
} else if (ret < 0) {
/* Apart from above checked -ve ret values, there are
@@ -6216,14 +7051,25 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
*/
if (data_selfheal || entry_selfheal || metadata_selfheal) {
ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
- if (ret < 0)
+ if (ret < 0) {
goto out;
+ }
dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
}
}
+
ret = 0;
+ op_errno = 0;
out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
if (dict)
dict_unref(dict);
@@ -6345,10 +7191,10 @@ afr_get_split_brain_status(void *opaque)
}
/* Calculation for string length :
- * (child_count X length of child-name) + SLEN (" Choices :")
+ * (child_count X length of child-name) + SLEN(" Choices :")
* child-name consists of :
* a) 251 = max characters for volname according to GD_VOLUME_NAME_MAX
- * b) strlen ("-client-00,") assuming 16 replicas
+ * b) strlen("-client-00,") assuming 16 replicas
*/
choices = alloca0(priv->child_count * (256 + SLEN("-client-00,")) +
SLEN(" Choices:"));
@@ -6357,8 +7203,8 @@ afr_get_split_brain_status(void *opaque)
if (ret) {
op_errno = -ret;
if (ret == -EAGAIN) {
- ret = dict_set_str(dict, GF_AFR_SBRAIN_STATUS,
- SBRAIN_HEAL_NO_GO_MSG);
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SBRAIN_HEAL_NO_GO_MSG);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, -ret,
AFR_MSG_DICT_SET_FAILED,
@@ -6387,16 +7233,15 @@ afr_get_split_brain_status(void *opaque)
op_errno = ENOMEM;
goto out;
}
- ret = dict_set_dynstr(dict, GF_AFR_SBRAIN_STATUS, status);
+ ret = dict_set_dynstr_sizen(dict, GF_AFR_SBRAIN_STATUS, status);
if (ret) {
op_errno = -ret;
ret = -1;
goto out;
}
} else {
- ret = dict_set_str(dict, GF_AFR_SBRAIN_STATUS,
- "The file is not under data or"
- " metadata split-brain");
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SFILE_NOT_UNDER_DATA);
if (ret) {
op_errno = -ret;
ret = -1;
@@ -6421,6 +7266,8 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
int op_errno = 0;
dict_t *dict = NULL;
afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
local = frame->local;
dict = dict_new();
@@ -6430,10 +7277,20 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- ret = afr_selfheal_do(frame, this, loc->gfid);
+ heal_frame = afr_frame_create(this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+ /*Initiate heal with heal_frame with lk-owner set so that inodelk/entrylk
+ * work correctly*/
+ ret = afr_selfheal_do(heal_frame, this, loc->gfid);
if (ret == 1 || ret == 2) {
- ret = dict_set_str(dict, "sh-fail-msg", "File not in split-brain");
+ ret = dict_set_sizen_str_sizen(dict, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
if (ret)
gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
"Failed to set sh-fail-msg in dict");
@@ -6451,6 +7308,10 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
if (local->op == GF_FOP_GETXATTR)
AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
else if (local->op == GF_FOP_SETXATTR)
@@ -6593,7 +7454,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto data_unlock;
ret = __afr_selfheal_data_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
@@ -6610,7 +7471,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque)
ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
LLONG_MAX - 1, 0, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS)
+ if (ret < priv->child_count)
goto mdata_unlock;
ret = __afr_selfheal_metadata_prepare(
heal_frame, this, inode, locked_on, sources, sinks,
@@ -6637,12 +7498,14 @@ afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
char *xattr = NULL;
int i = 0;
int len = 0;
+ int keylen = 0;
size_t str_len = 0;
int ret = -1;
priv = this->private;
local = frame->local;
+ keylen = strlen(local->cont.getxattr.name);
for (i = 0; i < priv->child_count; i++) {
if (!local->replies[i].valid || local->replies[i].op_ret) {
str_len = strlen(default_str);
@@ -6651,8 +7514,8 @@ afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
buf[len++] = delimiter;
buf[len] = '\0';
} else {
- ret = dict_get_str(local->replies[i].xattr,
- local->cont.getxattr.name, &xattr);
+ ret = dict_get_strn(local->replies[i].xattr,
+ local->cont.getxattr.name, keylen, &xattr);
if (ret) {
gf_msg("TEST", GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
"Failed to get the node_uuid of brick "
@@ -6939,3 +7802,77 @@ afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local)
return _gf_false;
}
+
+static gf_boolean_t
+afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT)
+ return _gf_false;
+
+ local = frame->local;
+
+ if (local->op != GF_FOP_LOOKUP)
+ /* TODO:If the replica count is being increased on a plain distribute
+ * volume that was never mounted, we need to allow setxattr on '/' with
+ * GF_CLIENT_PID_NO_ROOT_SQUASH to accomodate for DHT layout setting */
+ return _gf_false;
+
+ if (local->inode == NULL)
+ return _gf_false;
+
+ if (!__is_root_gfid(local->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame, const unsigned int up_children_count)
+{
+ if (frame && (up_children_count > 0) &&
+ afr_is_add_replica_mount_lookup_on_root(frame))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ unsigned char *success_replies = NULL;
+
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->quorum_count && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_errno = afr_final_errno(local, priv);
+ if (!local->op_errno)
+ local->op_errno = afr_quorum_errno(priv);
+ local->op_ret = -1;
+ }
+}
+
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
+{
+ int *pending = NULL;
+ int ret = 0;
+ int i = 0;
+
+ ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
+ if (ret == 0) {
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ /* Not doing a ntoh32(pending) as we just want to check
+ * if it is non-zero or not. */
+ if (pending[i]) {
+ return _gf_true;
+ }
+ }
+ }
+
+ return _gf_false;
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 4c40e85f393..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -10,24 +10,17 @@
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "checksum.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -45,6 +38,10 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx = local->fd_ctx;
child_index = (long)cookie;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
LOCK(&frame->lock);
{
if (op_ret == -1) {
@@ -56,19 +53,22 @@ afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->xdata_rsp && xdata)
local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
- if (call_count == 0)
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
AFR_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
local->fd, NULL);
+ }
+
return 0;
}
int
-afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
@@ -84,6 +84,12 @@ afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
goto out;
local->op = GF_FOP_OPENDIR;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
if (!afr_is_consistent_io_possible(local, priv, &op_errno))
goto out;
@@ -158,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
}
static void
-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
- gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
int ret = -1;
gf_dirent_t *entry = NULL;
@@ -177,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol,
list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
{
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
continue;
}
@@ -222,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0)
- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries,
- local->fd);
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 5725b1c5cb3..b7cceb79158 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -10,24 +10,20 @@
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -98,7 +94,9 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
}
if (local->inode) {
- afr_replies_interpret(frame, this, local->inode, NULL);
+ if (local->op != GF_FOP_RENAME && local->op != GF_FOP_LINK)
+ afr_replies_interpret(frame, this, local->inode, NULL);
+
inode_read_subvol = afr_data_subvol_get(local->inode, this, NULL, NULL,
NULL, &args);
}
@@ -121,11 +119,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
continue;
if (local->replies[i].op_ret < 0) {
if (local->inode)
- afr_inode_event_gen_reset(local->inode, this);
+ afr_inode_need_refresh_set(local->inode, this);
if (local->parent)
- afr_inode_event_gen_reset(local->parent, this);
+ afr_inode_need_refresh_set(local->parent, this);
if (local->parent2)
- afr_inode_event_gen_reset(local->parent2, this);
+ afr_inode_need_refresh_set(local->parent2, this);
continue;
}
@@ -231,9 +229,9 @@ __afr_dir_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
__afr_dir_write_fill(frame, this, child_index, op_ret, op_errno, buf,
preparent, postparent, preparent2, postparent2,
xdata);
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
if (call_count == 0) {
__afr_dir_write_finalize(frame, this);
@@ -347,6 +345,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int pre_op_count = 0;
int failed_count = 0;
+ unsigned char *success_replies = NULL;
local = frame->local;
priv = this->private;
@@ -362,9 +361,22 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
failed_count = AFR_COUNT(local->transaction.failed_subvols,
priv->child_count);
+ /* FOP succeeded on all bricks. */
if (pre_op_count == priv->child_count && !failed_count)
return;
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
+
+ if (priv->thin_arbiter_count) {
+ /*Mark new entry using ta file*/
+ local->is_new_entry = _gf_true;
+ return;
+ }
+
afr_mark_new_entry_changelog(frame, this);
return;
@@ -423,15 +435,11 @@ int
afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -473,16 +481,6 @@ afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -553,15 +551,11 @@ int
afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t dev, mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -596,16 +590,6 @@ afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -674,15 +658,11 @@ int
afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -698,7 +678,7 @@ afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local->cont.mkdir.mode = mode;
local->umask = umask;
- if (!xdata || !dict_get(xdata, "gfid-req")) {
+ if (!xdata || !dict_get_sizen(xdata, "gfid-req")) {
op_errno = EPERM;
gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
AFR_MSG_GFID_NULL,
@@ -724,16 +704,6 @@ afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -802,15 +772,11 @@ int
afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -845,16 +811,6 @@ afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(newloc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -924,15 +880,11 @@ int
afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -966,16 +918,6 @@ afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -1048,15 +990,10 @@ int
afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- int nlockee = 0;
-
- priv = this->private;
transaction_frame = copy_frame(frame);
if (!transaction_frame) {
@@ -1099,35 +1036,6 @@ afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(oldloc->path);
local->transaction.new_basename = AFR_BASENAME(newloc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = nlockee = 0;
- ret = afr_init_entry_lockee(
- &int_lock->lockee[nlockee], local, &local->transaction.new_parent_loc,
- local->transaction.new_basename, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- if (local->newloc.inode && IA_ISDIR(local->newloc.inode->ia_type)) {
- ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local,
- &local->newloc, NULL, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- }
- qsort(int_lock->lockee, nlockee, sizeof(*int_lock->lockee),
- afr_entry_lockee_cmp);
- int_lock->lockee_count = nlockee;
-
ret = afr_transaction(transaction_frame, this,
AFR_ENTRY_RENAME_TRANSACTION);
if (ret < 0) {
@@ -1196,15 +1104,11 @@ int
afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- priv = this->private;
-
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1237,16 +1141,6 @@ afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[0], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- int_lock->lockee_count++;
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
@@ -1313,15 +1207,10 @@ int
afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
dict_t *xdata)
{
- afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
call_frame_t *transaction_frame = NULL;
int ret = -1;
int op_errno = ENOMEM;
- int nlockee = 0;
-
- priv = this->private;
transaction_frame = copy_frame(frame);
if (!transaction_frame)
@@ -1355,26 +1244,6 @@ afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
local->transaction.main_frame = frame;
local->transaction.basename = AFR_BASENAME(loc->path);
- int_lock = &local->internal_lock;
-
- int_lock->lockee_count = nlockee = 0;
- ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local,
- &local->transaction.parent_loc,
- local->transaction.basename, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- ret = afr_init_entry_lockee(&int_lock->lockee[nlockee], local, &local->loc,
- NULL, priv->child_count);
- if (ret)
- goto out;
-
- nlockee++;
- qsort(int_lock->lockee, nlockee, sizeof(*int_lock->lockee),
- afr_entry_lockee_cmp);
- int_lock->lockee_count = nlockee;
-
ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
if (ret < 0) {
op_errno = -ret;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 03338592618..c5521704de2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -15,20 +15,17 @@
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "byte-order.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "quota-common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/quota-common-utils.h>
#include "afr-transaction.h"
#include "afr-messages.h"
@@ -305,6 +302,7 @@ afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
afr_local_t *local = NULL;
int op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -531,12 +529,16 @@ afr_fgetxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t callcnt = 0;
long int cky = 0;
int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
priv = this->private;
children = priv->children;
local = frame->local;
cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
LOCK(&frame->lock);
{
@@ -547,11 +549,12 @@ afr_fgetxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->dict)
local->dict = dict_new();
if (local->dict) {
- ret = dict_get_str(dict, local->cont.getxattr.name, &tmp_report);
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
if (ret)
goto unlock;
- ret = dict_set_dynstr(local->dict, children[cky]->name,
- gf_strdup(tmp_report));
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
if (ret)
goto unlock;
}
@@ -575,8 +578,8 @@ unlock:
}
if (serz_len == -1)
snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
- ret = dict_set_dynstr(xattr, local->cont.getxattr.name,
- gf_strdup(lk_summary));
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
if (ret) {
op_ret = -1;
op_errno = ENOMEM;
@@ -613,6 +616,8 @@ afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t callcnt = 0;
long int cky = 0;
int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
priv = this->private;
children = priv->children;
@@ -620,6 +625,9 @@ afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
+
LOCK(&frame->lock);
{
callcnt = --local->call_count;
@@ -629,11 +637,12 @@ afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->dict)
local->dict = dict_new();
if (local->dict) {
- ret = dict_get_str(dict, local->cont.getxattr.name, &tmp_report);
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
if (ret)
goto unlock;
- ret = dict_set_dynstr(local->dict, children[cky]->name,
- gf_strdup(tmp_report));
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
if (ret)
goto unlock;
}
@@ -657,8 +666,8 @@ unlock:
}
if (serz_len == -1)
snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
- ret = dict_set_dynstr(xattr, local->cont.getxattr.name,
- gf_strdup(lk_summary));
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
if (ret) {
op_ret = -1;
op_errno = ENOMEM;
@@ -802,15 +811,18 @@ unlock:
if (ret) {
local->op_ret = -1;
local->op_errno = ENOMEM;
+ GF_FREE(xattr_serz);
goto unwind;
}
- ret = dict_set_dynstr(local->dict, GF_XATTR_LIST_NODE_UUIDS_KEY,
- xattr_serz);
+ ret = dict_set_dynstr_sizen(local->dict, GF_XATTR_LIST_NODE_UUIDS_KEY,
+ xattr_serz);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Cannot set node_uuid key in dict");
local->op_ret = -1;
local->op_errno = ENOMEM;
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
} else {
local->op_ret = local->cont.getxattr.xattr_len - 1;
local->op_errno = 0;
@@ -934,24 +946,13 @@ unlock:
goto unwind;
}
- len = dict_serialized_length(local->dict);
- if (len <= 0) {
- goto unwind;
- }
-
- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (!lockinfo_buf) {
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
local->op_ret = -1;
- local->op_errno = ENOMEM;
goto unwind;
}
- op_ret = dict_serialize(local->dict, lockinfo_buf);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- }
-
op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
(void *)lockinfo_buf, len);
if (op_ret < 0) {
@@ -1050,24 +1051,13 @@ unlock:
goto unwind;
}
- len = dict_serialized_length(local->dict);
- if (len <= 0) {
- goto unwind;
- }
-
- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (!lockinfo_buf) {
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
local->op_ret = -1;
- local->op_errno = ENOMEM;
goto unwind;
}
- op_ret = dict_serialize(local->dict, lockinfo_buf);
- if (op_ret < 0) {
- local->op_ret = -1;
- local->op_errno = -op_ret;
- }
-
op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
(void *)lockinfo_buf, len);
if (op_ret < 0) {
@@ -1096,9 +1086,11 @@ afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = 0;
char *xattr = NULL;
char *xattr_serz = NULL;
+ int keylen = 0;
char xattr_cky[1024] = {
0,
};
+ int xattr_cky_len = 0;
dict_t *nxattr = NULL;
long cky = 0;
int32_t padding = 0;
@@ -1111,7 +1103,9 @@ afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
cky = (long)cookie;
-
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
LOCK(&frame->lock);
{
callcnt = --local->call_count;
@@ -1127,31 +1121,30 @@ afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!dict || (op_ret < 0))
goto unlock;
- if (!local->dict)
+ if (!local->dict) {
local->dict = dict_new();
-
- if (local->dict) {
- ret = dict_get_str(dict, local->cont.getxattr.name, &xattr);
- if (ret)
+ if (!local->dict)
goto unlock;
+ }
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
- xattr = gf_strdup(xattr);
-
- (void)snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
- local->cont.getxattr.name, cky);
- ret = dict_set_dynstr(local->dict, xattr_cky, xattr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
- "Cannot set xattr cookie key");
- goto unlock;
- }
+ xattr = gf_strdup(xattr);
- local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
}
+
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!callcnt) {
if (!local->cont.getxattr.xattr_len)
goto unwind;
@@ -1178,6 +1171,7 @@ unlock:
ret = dict_serialize_value_with_delim(
local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
if (ret) {
+ GF_FREE(xattr_serz);
goto unwind;
}
@@ -1185,10 +1179,14 @@ unlock:
*(xattr_serz + padding + tlen) = ')';
*(xattr_serz + padding + tlen + 1) = '\0';
- ret = dict_set_dynstr(nxattr, local->cont.getxattr.name, xattr_serz);
- if (ret)
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
+ }
unwind:
AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno,
@@ -1215,6 +1213,8 @@ afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
char xattr_cky[1024] = {
0,
};
+ int keylen = 0;
+ int xattr_cky_len = 0;
dict_t *nxattr = NULL;
long cky = 0;
int32_t padding = 0;
@@ -1227,7 +1227,9 @@ afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
cky = (long)cookie;
-
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
LOCK(&frame->lock);
{
callcnt = --local->call_count;
@@ -1243,32 +1245,30 @@ afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!dict || (op_ret < 0))
goto unlock;
- if (!local->dict)
+ if (!local->dict) {
local->dict = dict_new();
-
- if (local->dict) {
- ret = dict_get_str(dict, local->cont.getxattr.name, &xattr);
- if (ret)
+ if (!local->dict)
goto unlock;
+ }
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
- xattr = gf_strdup(xattr);
-
- (void)snprintf(xattr_cky, 1024, "%s-%ld", local->cont.getxattr.name,
- cky);
- ret = dict_set_dynstr(local->dict, xattr_cky, xattr);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
- "Cannot set xattr "
- "cookie key");
- goto unlock;
- }
+ xattr = gf_strdup(xattr);
- local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
}
+
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!callcnt) {
if (!local->cont.getxattr.xattr_len)
goto unwind;
@@ -1295,6 +1295,7 @@ unlock:
ret = dict_serialize_value_with_delim(
local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
if (ret) {
+ GF_FREE(xattr_serz);
goto unwind;
}
@@ -1302,10 +1303,14 @@ unlock:
*(xattr_serz + padding + tlen) = ')';
*(xattr_serz + padding + tlen + 1) = '\0';
- ret = dict_set_dynstr(nxattr, local->cont.getxattr.name, xattr_serz);
- if (ret)
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
+ }
unwind:
AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
@@ -1694,6 +1699,7 @@ afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
int32_t op_errno = 0;
fop_fgetxattr_cbk_t cbk = NULL;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -1787,6 +1793,7 @@ afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
afr_local_t *local = NULL;
int32_t op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -1862,6 +1869,7 @@ afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
afr_local_t *local = NULL;
int32_t op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 1627ee2c426..8c982bc7e6f 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -38,5 +38,8 @@ afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata);
int
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+int
afr_handle_quota_size(call_frame_t *frame, xlator_t *this);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index c6fe0939841..1d6e4f3570a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -8,27 +8,21 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "protocol-common.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
@@ -180,11 +174,10 @@ __afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
__afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
prebuf, postbuf, xattr, xdata);
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0) {
__afr_inode_write_finalize(frame, this);
@@ -498,6 +491,7 @@ afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int op_errno = ENOMEM;
int ret = -1;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
local = AFR_FRAME_INIT(frame, op_errno);
if (!local)
goto out;
@@ -737,6 +731,7 @@ afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -947,6 +942,7 @@ afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1059,15 +1055,14 @@ afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie,
local->replies[i].op_ret = op_ret;
local->replies[i].op_errno = op_errno;
- ret = dict_get_str(local->xdata_req, "replicate-brick-op", &op_type);
+ ret = dict_get_str_sizen(local->xdata_req, "replicate-brick-op", &op_type);
if (ret)
goto out;
- gf_msg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
- op_ret ? op_errno : 0, afr_get_msg_id(op_type),
- "Set of pending xattr %s on"
- " %s.",
- op_ret ? "failed" : "succeeded", priv->children[i]->name);
+ gf_smsg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
+ op_ret ? op_errno : 0, AFR_MSG_SET_PEND_XATTR, "name=%s",
+ priv->children[i]->name, "op_ret=%s",
+ op_ret ? "failed" : "succeeded", NULL);
out:
syncbarrier_wake(&local->barrier);
@@ -1105,10 +1100,10 @@ out:
return -ret;
}
-int
+static int
_afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
int empty_index, afr_transaction_type type,
- char *op_type)
+ char *op_type, const int op_type_len)
{
int count = 0;
int ret = -ENOMEM;
@@ -1139,7 +1134,8 @@ _afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
if (!local->xdata_req)
goto out;
- ret = dict_set_str(local->xdata_req, "replicate-brick-op", op_type);
+ ret = dict_set_nstrn(local->xdata_req, "replicate-brick-op",
+ SLEN("replicate-brick-op"), op_type, op_type_len);
if (ret)
goto out;
@@ -1160,9 +1156,8 @@ _afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
}
if (!count) {
- gf_msg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
- "Couldn't acquire lock on"
- " any child.");
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
+ NULL);
ret = -EAGAIN;
goto unlock;
}
@@ -1211,26 +1206,41 @@ _afr_handle_empty_brick(void *opaque)
call_frame_t *frame = NULL;
xlator_t *this = NULL;
char *op_type = NULL;
+ int op_type_len = 0;
afr_empty_brick_args_t *data = NULL;
+ call_frame_t *op_frame = NULL;
data = opaque;
frame = data->frame;
empty_index = data->empty_index;
+ if (!data->op_type)
+ goto out;
+
+ op_frame = copy_frame(frame);
+ if (!op_frame) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
op_type = data->op_type;
- this = frame->this;
+ op_type_len = strlen(op_type);
+ this = op_frame->this;
priv = this->private;
- local = AFR_FRAME_INIT(frame, op_errno);
+ afr_set_lk_owner(op_frame, this, op_frame->root);
+ local = AFR_FRAME_INIT(op_frame, op_errno);
if (!local)
goto out;
loc_copy(&local->loc, &data->loc);
- gf_msg(this->name, GF_LOG_INFO, 0, 0, "New brick is : %s",
- priv->children[empty_index]->name);
+ gf_smsg(this->name, GF_LOG_INFO, 0, AFR_MSG_NEW_BRICK, "name=%s",
+ priv->children[empty_index]->name, NULL);
- ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index,
- AFR_METADATA_TRANSACTION, op_type);
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_METADATA_TRANSACTION, op_type,
+ op_type_len);
if (ret) {
op_errno = -ret;
ret = -1;
@@ -1244,8 +1254,9 @@ _afr_handle_empty_brick(void *opaque)
local->xattr_req = NULL;
local->xdata_req = NULL;
- ret = _afr_handle_empty_brick_type(this, frame, &local->loc, empty_index,
- AFR_ENTRY_TRANSACTION, op_type);
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_ENTRY_TRANSACTION, op_type,
+ op_type_len);
if (ret) {
op_errno = -ret;
ret = -1;
@@ -1253,6 +1264,9 @@ _afr_handle_empty_brick(void *opaque)
}
ret = 0;
out:
+ if (op_frame) {
+ AFR_STACK_DESTROY(op_frame);
+ }
AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
return 0;
}
@@ -1273,14 +1287,14 @@ afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- ret = dict_set_int32(local->xdata_req, "heal-op",
- GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ ret = dict_set_int32_sizen(local->xdata_req, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
if (ret) {
op_errno = -ret;
ret = -1;
goto out;
}
- ret = dict_set_str(local->xdata_req, "child-name", data);
+ ret = dict_set_str_sizen(local->xdata_req, "child-name", data);
if (ret) {
op_errno = -ret;
ret = -1;
@@ -1297,9 +1311,8 @@ afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
*/
ret = afr_inode_split_brain_choice_set(loc->inode, this, -1);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to set"
- "split-brain choice to -1");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ NULL);
afr_heal_splitbrain_file(frame, this, loc);
ret = 0;
out:
@@ -1322,8 +1335,8 @@ afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len)
spb_child_index = afr_get_child_index_from_name(this, spb_child_str);
if (spb_child_index < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
- "Invalid subvol: %s", spb_child_str);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "subvol=%s", spb_child_str, NULL);
}
return spb_child_index;
}
@@ -1345,11 +1358,9 @@ afr_can_set_split_brain_choice(void *opaque)
&data->m_spb);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to determine if %s"
- " is in split-brain. "
- "Aborting split-brain-choice set.",
- uuid_utoa(loc->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, "gfid=%s",
+ uuid_utoa(loc->gfid), NULL);
return ret;
}
@@ -1357,7 +1368,8 @@ int
afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
dict_t *dict)
{
- void *value = NULL;
+ void *choice_value = NULL;
+ void *resolve_value = NULL;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
afr_spbc_timeout_t *data = NULL;
@@ -1368,6 +1380,14 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
priv = this->private;
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &choice_value, &len);
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &resolve_value,
+ &len);
+ if (!choice_value && !resolve_value) {
+ ret = -1;
+ goto out;
+ }
+
local = AFR_FRAME_INIT(frame, op_errno);
if (!local) {
ret = 1;
@@ -1376,9 +1396,9 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
local->op = GF_FOP_SETXATTR;
- ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &value, &len);
- if (value) {
- spb_child_index = afr_get_split_brain_child_index(this, value, len);
+ if (choice_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, choice_value,
+ len);
if (spb_child_index < 0) {
/* Case where value was "none" */
if (spb_child_index == -2)
@@ -1402,12 +1422,8 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice,
afr_set_split_brain_choice, NULL, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
- "Failed to create"
- " synctask. Aborting split-brain choice set"
- " for %s",
- loc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "name=%s", loc->name, NULL);
ret = 1;
op_errno = ENOMEM;
goto out;
@@ -1416,9 +1432,9 @@ afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
goto out;
}
- ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &value, &len);
- if (value) {
- spb_child_index = afr_get_split_brain_child_index(this, value, len);
+ if (resolve_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, resolve_value,
+ len);
if (spb_child_index < 0) {
ret = 1;
goto out;
@@ -1470,20 +1486,20 @@ afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
char *op_type = NULL;
afr_empty_brick_args_t *data = NULL;
- ret = dict_get_str(dict, GF_AFR_REPLACE_BRICK, &empty_brick);
+ ret = dict_get_str_sizen(dict, GF_AFR_REPLACE_BRICK, &empty_brick);
if (!ret)
op_type = GF_AFR_REPLACE_BRICK;
- ab_ret = dict_get_str(dict, GF_AFR_ADD_BRICK, &empty_brick);
+ ab_ret = dict_get_str_sizen(dict, GF_AFR_ADD_BRICK, &empty_brick);
if (!ab_ret)
op_type = GF_AFR_ADD_BRICK;
if (ret && ab_ret)
goto out;
- if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) {
- gf_msg(this->name, GF_LOG_ERROR, EPERM, afr_get_msg_id(op_type),
- "'%s' is an internal extended attribute.", op_type);
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) {
+ gf_smsg(this->name, GF_LOG_ERROR, EPERM, AFR_MSG_INTERNAL_ATTR,
+ "op_type=%s", op_type, NULL);
ret = 1;
goto out;
}
@@ -1509,8 +1525,8 @@ afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
ret = synctask_new(this->ctx->env, _afr_handle_empty_brick,
_afr_handle_empty_brick_cbk, NULL, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, afr_get_msg_id(op_type),
- "Failed to create synctask.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ NULL);
ret = 1;
op_errno = ENOMEM;
afr_brick_args_cleanup(data);
@@ -1668,6 +1684,7 @@ afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1876,6 +1893,7 @@ afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -1976,6 +1994,7 @@ afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2085,6 +2104,7 @@ afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2191,6 +2211,7 @@ afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2222,7 +2243,7 @@ afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->transaction.main_frame = frame;
- local->transaction.start = local->cont.discard.offset;
+ local->transaction.start = local->cont.zerofill.offset;
local->transaction.len = len;
afr_fix_open(fd, this);
@@ -2390,6 +2411,7 @@ afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = -1;
int op_errno = ENOMEM;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2484,7 +2506,9 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
call_frame_t *transaction_frame = NULL;
int ret = -1;
int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
transaction_frame = copy_frame(frame);
if (!transaction_frame)
goto out;
@@ -2493,10 +2517,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
if (!local)
goto out;
- if (xdata)
+ if (xdata) {
local->xdata_req = dict_copy_with_ref(xdata, NULL);
- else
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
local->xdata_req = dict_new();
+ }
if (!local->xdata_req)
goto out;
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 95e52ff4a09..bc8eabe0f43 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -22,11 +22,40 @@
#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
#define LOCKED_LOWER 0x2 /* for lower path */
+void
+afr_lockee_cleanup(afr_lockee_t *lockee)
+{
+ if (lockee->fd) {
+ fd_unref(lockee->fd);
+ lockee->fd = NULL;
+ } else {
+ loc_wipe(&lockee->loc);
+ }
+
+ GF_FREE(lockee->basename);
+ lockee->basename = NULL;
+ GF_FREE(lockee->locked_nodes);
+ lockee->locked_nodes = NULL;
+
+ return;
+}
+
+void
+afr_lockees_cleanup(afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_lockee_cleanup(&int_lock->lockee[i]);
+ }
+
+ return;
+}
int
afr_entry_lockee_cmp(const void *l1, const void *l2)
{
- const afr_entry_lockee_t *r1 = l1;
- const afr_entry_lockee_t *r2 = l2;
+ const afr_lockee_t *r1 = l1;
+ const afr_lockee_t *r2 = l2;
int ret = 0;
uuid_t gfid1 = {0};
uuid_t gfid2 = {0};
@@ -81,31 +110,14 @@ internal_lock_count(call_frame_t *frame, xlator_t *this)
}
int
-afr_is_inodelk_transaction(afr_transaction_type type)
-{
- int ret = 0;
-
- switch (type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- ret = 1;
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- ret = 0;
- break;
- }
-
- return ret;
-}
-
-int
-afr_init_entry_lockee(afr_entry_lockee_t *lockee, afr_local_t *local,
- loc_t *loc, char *basename, int child_count)
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count)
{
- int ret = -1;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
+ GF_ASSERT(int_lock->lockee_count < AFR_LOCKEE_COUNT_MAX);
loc_copy(&lockee->loc, loc);
lockee->basename = (basename) ? gf_strdup(basename) : NULL;
if (basename && !lockee->basename)
@@ -119,28 +131,45 @@ afr_init_entry_lockee(afr_entry_lockee_t *lockee, afr_local_t *local,
goto out;
ret = 0;
+ int_lock->lockee_count++;
out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
return ret;
}
-void
-afr_entry_lockee_cleanup(afr_internal_lock_t *int_lock)
+int
+afr_add_inode_lockee(afr_local_t *local, int child_count)
{
- int i = 0;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
- for (i = 0; i < int_lock->lockee_count; i++) {
- loc_wipe(&int_lock->lockee[i].loc);
- if (int_lock->lockee[i].basename)
- GF_FREE(int_lock->lockee[i].basename);
- if (int_lock->lockee[i].locked_nodes)
- GF_FREE(int_lock->lockee[i].locked_nodes);
+ if (local->fd) {
+ lockee->fd = fd_ref(local->fd);
+ } else {
+ loc_copy(&lockee->loc, &local->loc);
}
- return;
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+ int_lock->lockee_count++;
+out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
+ return ret;
}
static int
-initialize_entrylk_variables(call_frame_t *frame, xlator_t *this)
+initialize_internal_lock_variables(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
@@ -152,9 +181,10 @@ initialize_entrylk_variables(call_frame_t *frame, xlator_t *this)
local = frame->local;
int_lock = &local->internal_lock;
- int_lock->entrylk_lock_count = 0;
+ int_lock->lock_count = 0;
int_lock->lock_op_ret = -1;
int_lock->lock_op_errno = 0;
+ int_lock->lk_attempted_count = 0;
for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
if (!int_lock->lockee[i].locked_nodes)
@@ -167,28 +197,6 @@ initialize_entrylk_variables(call_frame_t *frame, xlator_t *this)
return 0;
}
-static int
-initialize_inodelk_variables(call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_count = 0;
- int_lock->lk_attempted_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
-
- memset(int_lock->locked_nodes, 0,
- sizeof(*int_lock->locked_nodes) * priv->child_count);
-
- return 0;
-}
-
int
afr_lockee_locked_nodes_count(afr_internal_lock_t *int_lock)
{
@@ -216,19 +224,74 @@ afr_locked_nodes_count(unsigned char *locked_nodes, int child_count)
return call_count;
}
-/* FIXME: What if UNLOCK fails */
+static void
+afr_log_locks_failure(call_frame_t *frame, char *where, char *what,
+ int op_errno)
+{
+ xlator_t *this = frame->this;
+ gf_lkowner_t *lk_owner = &frame->root->lk_owner;
+ afr_local_t *local = frame->local;
+ const char *fop = NULL;
+ char *gfid = NULL;
+ const char *name = NULL;
+
+ fop = gf_fop_list[local->op];
+
+ switch (local->transaction.type) {
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ switch (local->op) {
+ case GF_FOP_LINK:
+ gfid = uuid_utoa(local->newloc.pargfid);
+ name = local->newloc.name;
+ break;
+ default:
+ gfid = uuid_utoa(local->loc.pargfid);
+ name = local->loc.name;
+ break;
+ }
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do entry %s with lk-owner:%s on %s "
+ "while attempting %s on {pgfid:%s, name:%s}.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid, name);
+ break;
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ gfid = uuid_utoa(local->inode->gfid);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do inode %s with lk-owner:%s on %s "
+ "while attempting %s on gfid:%s.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid);
+ break;
+ }
+}
+
static int32_t
afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
afr_internal_lock_t *int_lock = NULL;
+ int lockee_num = 0;
int call_count = 0;
+ int child_index = 0;
int ret = 0;
local = frame->local;
int_lock = &local->internal_lock;
+ priv = this->private;
+ lockee_num = (int)((long)cookie) / priv->child_count;
+ child_index = (int)((long)cookie) % priv->child_count;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ afr_log_locks_failure(frame, priv->children[child_index]->name,
+ "unlock", op_errno);
+ }
+ int_lock->lockee[lockee_num].locked_nodes[child_index] &= LOCKED_NO;
if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1)
ret = afr_write_subvol_reset(frame, this);
@@ -239,7 +302,6 @@ afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK(&frame->lock);
if (call_count == 0) {
- gf_msg_trace(this->name, 0, "All internal locks unlocked");
int_lock->lock_cbk(frame, this);
}
@@ -247,143 +309,88 @@ afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
void
-afr_update_uninodelk(afr_local_t *local, afr_internal_lock_t *int_lock,
- int32_t child_index)
-{
- int_lock->locked_nodes[child_index] &= LOCKED_NO;
-}
-
-static int32_t
-afr_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int32_t child_index = (long)cookie;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- priv = this->private;
-
- if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
- "path=%s gfid=%s: unlock failed on subvolume %s "
- "with lock owner %s",
- local->loc.path, loc_gfid_utoa(&(local->loc)),
- priv->children[child_index]->name,
- lkowner_utoa(&frame->root->lk_owner));
- }
-
- afr_update_uninodelk(local, int_lock, child_index);
-
- afr_unlock_common_cbk(frame, cookie, this, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-static int
-afr_unlock_inodelk(call_frame_t *frame, xlator_t *this)
+afr_internal_lock_wind(call_frame_t *frame,
+ int32_t (*cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *),
+ void *cookie, int child, int lockee_num,
+ gf_boolean_t blocking, gf_boolean_t unlock)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ entrylk_cmd cmd = ENTRYLK_LOCK_NB;
+ int32_t cmd1 = F_SETLK;
struct gf_flock flock = {
0,
};
- int call_count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->flock.l_start;
- flock.l_len = int_lock->flock.l_len;
- flock.l_type = F_UNLCK;
-
- call_count = afr_locked_nodes_count(int_lock->locked_nodes,
- priv->child_count);
-
- int_lock->lk_call_count = call_count;
-
- if (!call_count) {
- GF_ASSERT(!local->transaction.do_eager_unlock);
- gf_msg_trace(this->name, 0, "No internal locks unlocked");
-
- int_lock->lock_cbk(frame, this);
- goto out;
- }
- for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
- continue;
-
- if (local->fd) {
- STACK_WIND_COOKIE(
- frame, afr_unlock_inodelk_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->finodelk,
- int_lock->domain, local->fd, F_SETLK, &flock, NULL);
- } else {
- STACK_WIND_COOKIE(
- frame, afr_unlock_inodelk_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->inodelk,
- int_lock->domain, &local->loc, F_SETLK, &flock, NULL);
- }
+ switch (local->transaction.type) {
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ if (unlock) {
+ cmd = ENTRYLK_UNLOCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd = ENTRYLK_LOCK;
+ }
- if (!--call_count)
+ if (local->fd) {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->fentrylk,
+ int_lock->domain,
+ int_lock->lockee[lockee_num].fd,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ } else {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_num].loc,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ }
break;
- }
-out:
- return 0;
-}
-
-static int32_t
-afr_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int32_t child_index = 0;
- int lockee_no = 0;
-
- priv = this->private;
- lockee_no = (int)((long)cookie) / priv->child_count;
- child_index = (int)((long)cookie) % priv->child_count;
- local = frame->local;
- int_lock = &local->internal_lock;
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ flock = int_lock->lockee[lockee_num].flock;
+ if (unlock) {
+ flock.l_type = F_UNLCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd1 = F_SETLKW;
+ }
- if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_ENTRY_UNLOCK_FAIL,
- "%s: unlock failed on %s", local->loc.path,
- priv->children[child_index]->name);
+ if (local->fd) {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->finodelk, int_lock->domain,
+ int_lock->lockee[lockee_num].fd, cmd1, &flock, NULL);
+ } else {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->inodelk, int_lock->domain,
+ &int_lock->lockee[lockee_num].loc, cmd1, &flock, NULL);
+ }
+ break;
}
-
- int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
- afr_unlock_common_cbk(frame, cookie, this, op_ret, op_errno, NULL);
-
- return 0;
}
static int
-afr_unlock_entrylk(call_frame_t *frame, xlator_t *this)
+afr_unlock_now(call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int call_count = 0;
- int index = 0;
- int lockee_no = 0;
- int copies = 0;
+ int child_index = 0;
+ int lockee_num = 0;
int i = -1;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
- copies = priv->child_count;
call_count = afr_lockee_locked_nodes_count(int_lock);
@@ -396,16 +403,13 @@ afr_unlock_entrylk(call_frame_t *frame, xlator_t *this)
}
for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
- lockee_no = i / copies;
- index = i % copies;
- if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
- STACK_WIND_COOKIE(
- frame, afr_unlock_entrylk_cbk, (void *)(long)i,
- priv->children[index], priv->children[index]->fops->entrylk,
- int_lock->domain, &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename, ENTRYLK_UNLOCK,
- ENTRYLK_WRLCK, NULL);
-
+ lockee_num = i / priv->child_count;
+ child_index = i % priv->child_count;
+ if (int_lock->lockee[lockee_num].locked_nodes[child_index] &
+ LOCKED_YES) {
+ afr_internal_lock_wind(frame, afr_unlock_common_cbk,
+ (void *)(long)i, child_index, lockee_num,
+ _gf_false, _gf_true);
if (!--call_count)
break;
}
@@ -415,18 +419,6 @@ out:
return 0;
}
-int32_t
-afr_unlock_now(call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = frame->local;
-
- if (afr_is_inodelk_transaction(local->transaction.type))
- afr_unlock_inodelk(frame, this);
- else
- afr_unlock_entrylk(frame, this);
- return 0;
-}
-
static int32_t
afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -436,14 +428,14 @@ afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
afr_private_t *priv = NULL;
int cky = (long)cookie;
int child_index = 0;
- int lockee_no = 0;
+ int lockee_num = 0;
priv = this->private;
local = frame->local;
int_lock = &local->internal_lock;
child_index = ((int)cky) % priv->child_count;
- lockee_no = ((int)cky) / priv->child_count;
+ lockee_num = ((int)cky) / priv->child_count;
LOCK(&frame->lock);
{
@@ -470,23 +462,16 @@ afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
afr_unlock_now(frame, this);
} else {
if (op_ret == 0) {
- if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
- local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- int_lock->lockee[lockee_no]
- .locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lockee[lockee_no].locked_count++;
- int_lock->entrylk_lock_count++;
- } else {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
-
- if (local->transaction.type == AFR_DATA_TRANSACTION) {
- LOCK(&local->inode->lock);
- {
- local->inode_ctx->lock_count++;
- }
- UNLOCK(&local->inode->lock);
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
+ int_lock->lock_count++;
+ if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->lock_count++;
}
+ UNLOCK(&local->inode->lock);
}
}
afr_lock_blocking(frame, this, cky + 1);
@@ -495,30 +480,6 @@ afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
return 0;
}
-static int32_t
-afr_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
-}
-
-static int32_t
-afr_blocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
-}
-
-static gf_boolean_t
-afr_is_entrylk(afr_transaction_type trans_type)
-{
- if (afr_is_inodelk_transaction(trans_type))
- return _gf_false;
- return _gf_true;
-}
-
static gf_boolean_t
_is_lock_wind_needed(afr_local_t *local, int child_index)
{
@@ -528,40 +489,12 @@ _is_lock_wind_needed(afr_local_t *local, int child_index)
return _gf_true;
}
-static void
-afr_log_entry_locks_failure(xlator_t *this, afr_local_t *local,
- afr_internal_lock_t *int_lock)
-{
- const char *fop = NULL;
- char *pargfid = NULL;
- const char *name = NULL;
-
- fop = gf_fop_list[local->op];
-
- switch (local->op) {
- case GF_FOP_LINK:
- pargfid = uuid_utoa(local->newloc.pargfid);
- name = local->newloc.name;
- break;
- default:
- pargfid = uuid_utoa(local->loc.pargfid);
- name = local->loc.name;
- break;
- }
-
- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED,
- "Unable to obtain sufficient blocking entry locks on at least "
- "one child while attempting %s on {pgfid:%s, name:%s}.",
- fop, pargfid, name);
-}
-
static gf_boolean_t
is_blocking_locks_count_sufficient(call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_internal_lock_t *int_lock = NULL;
- gf_boolean_t is_entrylk = _gf_false;
int child = 0;
int nlockee = 0;
int lockee_count = 0;
@@ -571,42 +504,26 @@ is_blocking_locks_count_sufficient(call_frame_t *frame, xlator_t *this)
priv = this->private;
int_lock = &local->internal_lock;
lockee_count = int_lock->lockee_count;
- is_entrylk = afr_is_entrylk(local->transaction.type);
-
- if (!is_entrylk) {
- if (int_lock->lock_count == 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED,
- "Unable to obtain "
- "blocking inode lock on even one child for "
- "gfid:%s.",
- uuid_utoa(local->inode->gfid));
- return _gf_false;
- } else {
- /*inodelk succeeded on at least one child. */
- return _gf_true;
- }
- } else {
- if (int_lock->entrylk_lock_count == 0) {
- afr_log_entry_locks_failure(this, local, int_lock);
- return _gf_false;
- }
- /* For FOPS that take multiple sets of locks (mkdir, rename),
- * there must be at least one brick on which the locks from
- * all lock sets were successful. */
- for (child = 0; child < priv->child_count; child++) {
- ret = _gf_true;
- for (nlockee = 0; nlockee < lockee_count; nlockee++) {
- if (!(int_lock->lockee[nlockee].locked_nodes[child] &
- LOCKED_YES))
- ret = _gf_false;
- }
- if (ret)
- return ret;
+ if (int_lock->lock_count == 0) {
+ afr_log_locks_failure(frame, "any subvolume", "lock",
+ int_lock->lock_op_errno);
+ return _gf_false;
+ }
+ /* For FOPS that take multiple sets of locks (mkdir, rename),
+ * there must be at least one brick on which the locks from
+ * all lock sets were successful. */
+ for (child = 0; child < priv->child_count; child++) {
+ ret = _gf_true;
+ for (nlockee = 0; nlockee < lockee_count; nlockee++) {
+ if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES))
+ ret = _gf_false;
}
- if (!ret)
- afr_log_entry_locks_failure(this, local, int_lock);
+ if (ret)
+ return ret;
}
+ if (!ret)
+ afr_log_locks_failure(frame, "all", "lock", int_lock->lock_op_errno);
return ret;
}
@@ -617,27 +534,16 @@ afr_lock_blocking(call_frame_t *frame, xlator_t *this, int cookie)
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- struct gf_flock flock = {
- 0,
- };
uint64_t ctx = 0;
int ret = 0;
int child_index = 0;
- int lockee_no = 0;
- gf_boolean_t is_entrylk = _gf_false;
+ int lockee_num = 0;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
child_index = cookie % priv->child_count;
- lockee_no = cookie / priv->child_count;
- is_entrylk = afr_is_entrylk(local->transaction.type);
-
- if (!is_entrylk) {
- flock.l_start = int_lock->flock.l_start;
- flock.l_len = int_lock->flock.l_len;
- flock.l_type = int_lock->flock.l_type;
- }
+ lockee_num = cookie / priv->child_count;
if (local->fd) {
ret = fd_ctx_get(local->fd, this, &ctx);
@@ -681,52 +587,8 @@ afr_lock_blocking(call_frame_t *frame, xlator_t *this, int cookie)
return 0;
}
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
-
- if (local->fd) {
- STACK_WIND_COOKIE(
- frame, afr_blocking_inodelk_cbk, (void *)(long)child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->finodelk,
- int_lock->domain, local->fd, F_SETLKW, &flock, NULL);
-
- } else {
- STACK_WIND_COOKIE(
- frame, afr_blocking_inodelk_cbk, (void *)(long)child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->inodelk,
- int_lock->domain, &local->loc, F_SETLKW, &flock, NULL);
- }
-
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- /*Accounting for child_index increments on 'down'
- *and 'fd-less' children */
-
- if (local->fd) {
- STACK_WIND_COOKIE(frame, afr_blocking_entrylk_cbk,
- (void *)(long)cookie,
- priv->children[child_index],
- priv->children[child_index]->fops->fentrylk,
- int_lock->domain, local->fd,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- } else {
- STACK_WIND_COOKIE(
- frame, afr_blocking_entrylk_cbk, (void *)(long)cookie,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- int_lock->domain, &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename, ENTRYLK_LOCK,
- ENTRYLK_WRLCK, NULL);
- }
-
- break;
- }
+ afr_internal_lock_wind(frame, afr_lock_cbk, (void *)(long)cookie,
+ child_index, lockee_num, _gf_true, _gf_false);
return 0;
}
@@ -743,20 +605,10 @@ afr_blocking_lock(call_frame_t *frame, xlator_t *this)
local = frame->local;
int_lock = &local->internal_lock;
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- initialize_inodelk_variables(frame, this);
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- up_count = AFR_COUNT(local->child_up, priv->child_count);
- int_lock->lk_call_count = int_lock->lk_expected_count =
- (int_lock->lockee_count * up_count);
- initialize_entrylk_variables(frame, this);
- break;
- }
+ up_count = AFR_COUNT(local->child_up, priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count =
+ (int_lock->lockee_count * up_count);
+ initialize_internal_lock_variables(frame, this);
afr_lock_blocking(frame, this, 0);
@@ -764,171 +616,20 @@ afr_blocking_lock(call_frame_t *frame, xlator_t *this)
}
static int32_t
-afr_nonblocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_nb_internal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
int call_count = 0;
- int child_index = (long)cookie;
- int copies = 0;
- int index = 0;
- int lockee_no = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- copies = priv->child_count;
- index = child_index % copies;
- lockee_no = child_index / copies;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK(&frame->lock);
- {
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_msg(this->name, GF_LOG_ERROR, ENOSYS,
- AFR_MSG_LOCK_XLATOR_NOT_LOADED,
- "subvolume does not support "
- "locking. please load features/locks"
- " xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
-
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- } else if (op_ret == 0) {
- int_lock->lockee[lockee_no].locked_nodes[index] |= LOCKED_YES;
- int_lock->lockee[lockee_no].locked_count++;
- int_lock->entrylk_lock_count++;
- }
-
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
- gf_msg_trace(this->name, 0, "Last locking reply received");
- /* all locks successful. Proceed to call FOP */
- if (int_lock->entrylk_lock_count == int_lock->lk_expected_count) {
- gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk");
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk(frame, this);
- }
- /* Not all locks were successful. Unlock and try locking
- again, this time with serially blocking locks */
- else {
- gf_msg_trace(this->name, 0,
- "%d servers locked. Trying again "
- "with blocking calls",
- int_lock->lock_count);
-
- afr_unlock_now(frame, this);
- }
- }
-
- return 0;
-}
-
-int
-afr_nonblocking_entrylk(call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ int child_index = 0;
+ int lockee_num = 0;
afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int copies = 0;
- int index = 0;
- int lockee_no = 0;
- int32_t call_count = 0;
- int i = 0;
- local = frame->local;
- int_lock = &local->internal_lock;
priv = this->private;
- copies = priv->child_count;
- initialize_entrylk_variables(frame, this);
-
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get(local->fd, this);
- if (!fd_ctx) {
- gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED,
- "unable to get fd ctx for fd=%p", local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
-
- afr_unlock_now(frame, this);
- return -1;
- }
-
- call_count = int_lock->lockee_count * internal_lock_count(frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- if (!call_count) {
- gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON,
- "fd not open on any subvolumes. aborting.");
- afr_unlock_now(frame, this);
- goto out;
- }
-
- /* Send non-blocking entrylk calls only on up children
- and where the fd has been opened */
- for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
- index = i % copies;
- lockee_no = i / copies;
- if (local->child_up[index]) {
- STACK_WIND_COOKIE(frame, afr_nonblocking_entrylk_cbk,
- (void *)(long)i, priv->children[index],
- priv->children[index]->fops->fentrylk,
- this->name, local->fd,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- if (!--call_count)
- break;
- }
- }
- } else {
- call_count = int_lock->lockee_count * internal_lock_count(frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
- index = i % copies;
- lockee_no = i / copies;
- if (local->child_up[index]) {
- STACK_WIND_COOKIE(frame, afr_nonblocking_entrylk_cbk,
- (void *)(long)i, priv->children[index],
- priv->children[index]->fops->entrylk,
- this->name, &int_lock->lockee[lockee_no].loc,
- int_lock->lockee[lockee_no].basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
-
- if (!--call_count)
- break;
- }
- }
- }
-out:
- return 0;
-}
-
-int32_t
-afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long)cookie;
+ child_index = ((long)cookie) % priv->child_count;
+ lockee_num = ((long)cookie) / priv->child_count;
local = frame->local;
int_lock = &local->internal_lock;
@@ -953,11 +654,14 @@ afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
" xlator on server");
local->op_ret = op_ret;
int_lock->lock_op_ret = op_ret;
+
int_lock->lock_op_errno = op_errno;
local->op_errno = op_errno;
}
- } else {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
int_lock->lock_count++;
}
@@ -966,7 +670,7 @@ afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
UNLOCK(&frame->lock);
if (call_count == 0) {
- gf_msg_trace(this->name, 0, "Last inode locking reply received");
+ gf_msg_trace(this->name, 0, "Last locking reply received");
/* all locks successful. Proceed to call FOP */
if (int_lock->lock_count == int_lock->lk_expected_count) {
gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk");
@@ -977,8 +681,8 @@ afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
again, this time with serially blocking locks */
else {
gf_msg_trace(this->name, 0,
- "%d servers locked. "
- "Trying again with blocking calls",
+ "%d servers locked. Trying again "
+ "with blocking calls",
int_lock->lock_count);
afr_unlock_now(frame, this);
@@ -989,12 +693,14 @@ afr_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this)
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
+ int child = 0;
+ int lockee_num = 0;
int32_t call_count = 0;
int i = 0;
int ret = 0;
@@ -1003,7 +709,7 @@ afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
priv = this->private;
- initialize_inodelk_variables(frame, this);
+ initialize_internal_lock_variables(frame, this);
if (local->fd) {
fd_ctx = afr_fd_ctx_get(local->fd, this);
@@ -1022,36 +728,29 @@ afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this)
}
}
- call_count = internal_lock_count(frame, this);
+ call_count = int_lock->lockee_count * internal_lock_count(frame, this);
int_lock->lk_call_count = call_count;
int_lock->lk_expected_count = call_count;
if (!call_count) {
- gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
- "All bricks are down, aborting.");
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON,
+ "fd not open on any subvolumes. aborting.");
afr_unlock_now(frame, this);
goto out;
}
- /* Send non-blocking inodelk calls only on up children
+ /* Send non-blocking lock calls only on up children
and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
-
- if (local->fd) {
- STACK_WIND_COOKIE(
- frame, afr_nonblocking_inodelk_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->finodelk,
- int_lock->domain, local->fd, F_SETLK, &int_lock->flock, NULL);
- } else {
- STACK_WIND_COOKIE(
- frame, afr_nonblocking_inodelk_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->inodelk,
- int_lock->domain, &local->loc, F_SETLK, &int_lock->flock, NULL);
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ child = i % priv->child_count;
+ lockee_num = i / priv->child_count;
+ if (local->child_up[child]) {
+ afr_internal_lock_wind(frame, afr_nb_internal_lock_cbk,
+ (void *)(long)i, child, lockee_num,
+ _gf_false, _gf_false);
+ if (!--call_count)
+ break;
}
- if (!--call_count)
- break;
}
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 531184b4bd7..816065fb57a 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __AFR_MEM_TYPES_H__
#define __AFR_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_afr_mem_types_ {
gf_afr_mt_afr_fd_ctx_t = gf_common_mt_end + 1,
@@ -31,6 +31,8 @@ enum gf_afr_mem_types_ {
gf_afr_mt_empty_brick_t,
gf_afr_mt_child_latency_t,
gf_afr_mt_atomic_t,
+ gf_afr_mt_lk_heal_info_t,
+ gf_afr_mt_gf_lock,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
index 696336889d3..e73fd997765 100644
--- a/xlators/cluster/afr/src/afr-messages.h
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -11,7 +11,7 @@
#ifndef _AFR_MESSAGES_H_
#define _AFR_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -23,25 +23,145 @@
* glfs-message-id.h.
*/
-GLFS_MSGID(AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET,
- AFR_MSG_QUORUM_OVERRIDE, AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP,
- AFR_MSG_SUBVOLS_DOWN, AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN,
- AFR_MSG_OPEN_FAIL, AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS,
- AFR_MSG_GFID_NULL, AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
- AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
- AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
- AFR_MSG_BLOCKING_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
- AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
- AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR,
- AFR_MSG_SELF_HEAL_INFO, AFR_MSG_READ_SUBVOL_ERROR,
- AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
- AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD,
- AFR_MSG_INVALID_DATA, AFR_MSG_INVALID_ARG,
- AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
- AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
- AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS,
- AFR_MSG_NO_CHANGELOG, AFR_MSG_TIMER_CREATE_FAIL,
- AFR_MSG_SBRAIN_FAV_CHILD_POLICY, AFR_MSG_INODE_CTX_GET_FAILED,
- AFR_MSG_THIN_ARB);
+GLFS_MSGID(
+ AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET, AFR_MSG_QUORUM_OVERRIDE,
+ AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP, AFR_MSG_SUBVOLS_DOWN,
+ AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN, AFR_MSG_OPEN_FAIL,
+ AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS, AFR_MSG_GFID_NULL,
+ AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
+ AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
+ AFR_MSG_INTERNAL_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
+ AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR, AFR_MSG_SELF_HEAL_INFO,
+ AFR_MSG_READ_SUBVOL_ERROR, AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD, AFR_MSG_INVALID_DATA,
+ AFR_MSG_INVALID_ARG, AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
+ AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
+ AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS, AFR_MSG_NO_CHANGELOG,
+ AFR_MSG_TIMER_CREATE_FAIL, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ AFR_MSG_INODE_CTX_GET_FAILED, AFR_MSG_THIN_ARB,
+ AFR_MSG_THIN_ARB_XATTROP_FAILED, AFR_MSG_THIN_ARB_LOC_POP_FAILED,
+ AFR_MSG_GET_PEND_VAL, AFR_MSG_THIN_ARB_SKIP_SHD, AFR_MSG_UNKNOWN_SET,
+ AFR_MSG_NO_XL_ID, AFR_MSG_SELF_HEAL_INFO_START,
+ AFR_MSG_SELF_HEAL_INFO_FINISH, AFR_MSG_INCRE_COUNT,
+ AFR_MSG_ADD_TO_OUTPUT_FAILED, AFR_MSG_SET_TIME_FAILED,
+ AFR_MSG_GFID_MISMATCH_DETECTED, AFR_MSG_GFID_HEAL_MSG,
+ AFR_MSG_THIN_ARB_LOOKUP_FAILED, AFR_MSG_DICT_CREATE_FAILED,
+ AFR_MSG_NO_MAJORITY_TO_RESOLVE, AFR_MSG_TYPE_MISMATCH,
+ AFR_MSG_SIZE_POLICY_NOT_APPLICABLE, AFR_MSG_NO_CHILD_SELECTED,
+ AFR_MSG_INVALID_CHILD, AFR_MSG_RESOLVE_CONFLICTING_DATA,
+ SERROR_GETTING_SRC_BRICK, SNO_DIFF_IN_MTIME, SNO_BIGGER_FILE,
+ SALL_BRICKS_UP_TO_RESOLVE, AFR_MSG_UNLOCK_FAILED, AFR_MSG_POST_OP_FAILED,
+ AFR_MSG_TA_FRAME_CREATE_FAILED, AFR_MSG_SET_KEY_XATTROP_FAILED,
+ AFR_MSG_BLOCKING_ENTRYLKS_FAILED, AFR_MSG_FOP_FAILED,
+ AFR_MSG_CLEAN_UP_FAILED, AFR_MSG_UNABLE_TO_FETCH, AFR_MSG_XATTR_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_REPLICA, AFR_MSG_INODE_CTX_FAILED,
+ AFR_MSG_LOOKUP_FAILED, AFR_MSG_ALL_SUBVOLS_DOWN,
+ AFR_MSG_RELEASE_LOCK_FAILED, AFR_MSG_CLEAR_TIME_SPLIT_BRAIN,
+ AFR_MSG_READ_FAILED, AFR_MSG_LAUNCH_FAILED, AFR_MSG_READ_SUBVOL_NOT_UP,
+ AFR_MSG_LK_HEAL_DOM, AFR_MSG_NEW_BRICK, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, AFR_MSG_HEALER_SPAWN_FAILED,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, AFR_MSG_NULL_DEREF, AFR_MSG_SET_PEND_XATTR,
+ AFR_MSG_INTERNAL_ATTR);
+#define AFR_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define AFR_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define AFR_MSG_HEALER_SPAWN_FAILED_STR "Healer spawn failed"
+#define AFR_MSG_ADD_CRAWL_EVENT_FAILED_STR "Adding crawl event failed"
+#define AFR_MSG_INVALID_ARG_STR "Invalid argument"
+#define AFR_MSG_INDEX_DIR_GET_FAILED_STR "unable to get index-dir on "
+#define AFR_MSG_THIN_ARB_LOOKUP_FAILED_STR "Failed lookup on file"
+#define AFR_MSG_DICT_CREATE_FAILED_STR "Failed to create dict."
+#define AFR_MSG_THIN_ARB_XATTROP_FAILED_STR "Xattrop failed."
+#define AFR_MSG_THIN_ARB_LOC_POP_FAILED_STR \
+ "Failed to populate loc for thin-arbiter"
+#define AFR_MSG_GET_PEND_VAL_STR "Error getting value of pending"
+#define AFR_MSG_THIN_ARB_SKIP_SHD_STR "I am not the god shd. skipping."
+#define AFR_MSG_UNKNOWN_SET_STR "Unknown set"
+#define AFR_MSG_NO_XL_ID_STR "xl does not have id"
+#define AFR_MSG_SELF_HEAL_INFO_START_STR "starting full sweep on"
+#define AFR_MSG_SELF_HEAL_INFO_FINISH_STR "finished full sweep on"
+#define AFR_MSG_INCRE_COUNT_STR "Could not increment the counter."
+#define AFR_MSG_ADD_TO_OUTPUT_FAILED_STR "Could not add to output"
+#define AFR_MSG_SET_TIME_FAILED_STR "Could not set time"
+#define AFR_MSG_GFID_HEAL_MSG_STR "Error setting gfid-heal-msg dict"
+#define AFR_MSG_NO_MAJORITY_TO_RESOLVE_STR \
+ "No majority to resolve gfid split brain"
+#define AFR_MSG_GFID_MISMATCH_DETECTED_STR "Gfid mismatch dectected"
+#define AFR_MSG_SELF_HEAL_INFO_STR "performing selfheal"
+#define AFR_MSG_TYPE_MISMATCH_STR "TYPE mismatch"
+#define AFR_MSG_SIZE_POLICY_NOT_APPLICABLE_STR \
+ "Size policy is not applicable to directories."
+#define AFR_MSG_NO_CHILD_SELECTED_STR \
+ "No child selected by favorite-child policy"
+#define AFR_MSG_INVALID_CHILD_STR "Invalid child"
+#define AFR_MSG_RESOLVE_CONFLICTING_DATA_STR \
+ "selected as authentic to resolve conflicting data"
+#define SERROR_GETTING_SRC_BRICK_STR "Error getting the source brick"
+#define SNO_DIFF_IN_MTIME_STR "No difference in mtime"
+#define SNO_BIGGER_FILE_STR "No bigger file"
+#define SALL_BRICKS_UP_TO_RESOLVE_STR \
+ "All the bricks should be up to resolve the gfid split brain"
+#define AFR_MSG_UNLOCK_FAILED_STR "Failed to unlock"
+#define AFR_MSG_POST_OP_FAILED_STR "Post-op on thin-arbiter failed"
+#define AFR_MSG_TA_FRAME_CREATE_FAILED_STR "Failed to create ta_frame"
+#define AFR_MSG_SET_KEY_XATTROP_FAILED_STR "Could not set key during xattrop"
+#define AFR_MSG_BLOCKING_ENTRYLKS_FAILED_STR "Blocking entrylks failed"
+#define AFR_MSG_FSYNC_FAILED_STR "fsync failed"
+#define AFR_MSG_QUORUM_FAIL_STR "quorum is not met"
+#define AFR_MSG_FOP_FAILED_STR "Failing Fop"
+#define AFR_MSG_INVALID_SUBVOL_STR "not a subvolume"
+#define AFR_MSG_VOL_MISCONFIGURED_STR "Volume is dangling"
+#define AFR_MSG_CHILD_MISCONFIGURED_STR \
+ "replicate translator needs more than one subvolume defined"
+#define AFR_MSG_CLEAN_UP_FAILED_STR "Failed to clean up healer threads"
+#define AFR_MSG_QUORUM_OVERRIDE_STR "overriding quorum-count"
+#define AFR_MSG_UNABLE_TO_FETCH_STR \
+ "Unable to fetch afr-pending-xattr option from volfile. Falling back to " \
+ "using client translator names"
+#define AFR_MSG_NULL_DEREF_STR "possible NULL deref"
+#define AFR_MSG_XATTR_SET_FAILED_STR "Cannot set xattr cookie key"
+#define AFR_MSG_SPLIT_BRAIN_STATUS_STR "Failed to create synctask"
+#define AFR_MSG_SUBVOLS_DOWN_STR "All subvolumes are not up"
+#define AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR_STR \
+ "Failed to cancel split-brain choice"
+#define AFR_MSG_SPLIT_BRAIN_REPLICA_STR \
+ "Cannot set replica. File is not in data/metadata split-brain"
+#define AFR_MSG_INODE_CTX_FAILED_STR "Failed to get inode_ctx"
+#define AFR_MSG_READ_SUBVOL_ERROR_STR "no read subvols"
+#define AFR_MSG_LOCAL_CHILD_STR "selecting local read-child"
+#define AFR_MSG_LOOKUP_FAILED_STR "Failed to lookup/create thin-arbiter id file"
+#define AFR_MSG_TIMER_CREATE_FAIL_STR \
+ "Cannot create timer for delayed initialization"
+#define AFR_MSG_SUBVOL_UP_STR "Subvolume came back up; going online"
+#define AFR_MSG_ALL_SUBVOLS_DOWN_STR \
+ "All subvolumes are down. Going offline until atleast one of them is up"
+#define AFR_MSG_RELEASE_LOCK_FAILED_STR "Failed to release lock"
+#define AFR_MSG_INVALID_CHILD_UP_STR "Received child_up from invalid subvolume"
+#define AFR_MSG_QUORUM_MET_STR "Client-quorum is met"
+#define AFR_MSG_EXPUNGING_FILE_OR_DIR_STR "expunging file or dir"
+#define AFR_MSG_SELF_HEAL_FAILED_STR "Invalid"
+#define AFR_MSG_SPLIT_BRAIN_STR "Skipping conservative mergeon the file"
+#define AFR_MSG_CLEAR_TIME_SPLIT_BRAIN_STR "clear time split brain"
+#define AFR_MSG_READ_FAILED_STR "Failing read since good brick is down"
+#define AFR_MSG_LAUNCH_FAILED_STR "Failed to launch synctask"
+#define AFR_MSG_READ_SUBVOL_NOT_UP_STR \
+ "read subvolume in this generation is not up"
+#define AFR_MSG_INTERNAL_LKS_FAILED_STR \
+ "Unable to work with lk-owner while attempting fop"
+#define AFR_MSG_LOCK_XLATOR_NOT_LOADED_STR \
+ "subvolume does not support locking. please load features/locks xlator " \
+ "on server."
+#define AFR_MSG_FD_CTX_GET_FAILED_STR "unable to get fd ctx"
+#define AFR_MSG_INFO_COMMON_STR "fd not open on any subvolumes, aborting."
+#define AFR_MSG_REPLACE_BRICK_STATUS_STR "Couldn't acquire lock on any child."
+#define AFR_MSG_NEW_BRICK_STR "New brick"
+#define AFR_MSG_SPLIT_BRAIN_SET_FAILED_STR \
+ "Failed to set split-brain choice to -1"
+#define AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED_STR \
+ "Failed to determine split-brain. Aborting split-brain-choice set"
+#define AFR_MSG_OPEN_FAIL_STR "Failed to open subvolume"
+#define AFR_MSG_SET_PEND_XATTR_STR "Set of pending xattr"
+#define AFR_MSG_INTERNAL_ATTR_STR "is an internal extended attribute"
#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 1763dc0a4cc..64856042b65 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -8,32 +8,22 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+
#include "afr-transaction.h"
gf_boolean_t
@@ -73,6 +63,10 @@ afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
local = frame->local;
fd_ctx = local->fd_ctx;
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
LOCK(&frame->lock);
{
if (op_ret == -1) {
@@ -84,13 +78,16 @@ afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (!local->xdata_rsp && xdata)
local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
}
UNLOCK(&frame->lock);
- call_count = afr_frame_return(frame);
-
if (call_count == 0) {
- if ((fd_ctx->flags & O_TRUNC) && (local->op_ret >= 0)) {
+ afr_handle_replies_quorum(frame, this);
+ if (local->op_ret == -1) {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ } else if (fd_ctx->flags & O_TRUNC) {
STACK_WIND(frame, afr_open_ftruncate_cbk, this,
this->fops->ftruncate, fd, 0, NULL);
} else {
@@ -140,7 +137,7 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int spb_choice = 0;
+ int spb_subvol = 0;
int event_generation = 0;
int ret = 0;
int32_t op_errno = 0;
@@ -161,6 +158,11 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto out;
}
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
if (!afr_is_consistent_io_possible(local, priv, &op_errno))
goto out;
@@ -177,9 +179,9 @@ afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = afr_inode_get_readable(frame, local->inode, this, NULL,
&event_generation, AFR_DATA_TRANSACTION);
if ((ret < 0) &&
- (afr_inode_split_brain_choice_get(local->inode, this, &spb_choice) ==
- 0) &&
- spb_choice < 0) {
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
afr_open_continue);
} else {
@@ -213,11 +215,9 @@ afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
"successfully on subvolume %s",
local->loc.path, priv->children[child_index]->name);
} else {
- gf_msg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
- AFR_MSG_OPEN_FAIL,
- "Failed to open %s on "
- "subvolume %s",
- local->loc.path, priv->children[child_index]->name);
+ gf_smsg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
+ AFR_MSG_OPEN_FAIL, "path=%s", local->loc.path, "subvolume=%s",
+ priv->children[child_index]->name, NULL);
}
fd_ctx = local->fd_ctx;
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 1cd5c2eee3b..6fc2c75145c 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -30,27 +30,6 @@ afr_pending_read_decrement(afr_private_t *priv, int child_index)
GF_ATOMIC_DEC(priv->pending_reads[child_index]);
}
-static gf_boolean_t
-afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
-{
- int *pending = NULL;
- int ret = 0;
- int i = 0;
-
- ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
- if (ret == 0) {
- for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
- /* Not doing a ntoh32(pending) as we just want to check
- * if it is non-zero or not. */
- if (pending[i]) {
- return _gf_true;
- }
- }
- }
-
- return _gf_false;
-}
-
void
afr_read_txn_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
@@ -114,7 +93,7 @@ afr_ta_read_txn(void *opaque)
call_frame_t *frame = NULL;
xlator_t *this = NULL;
int read_subvol = -1;
- int up_child = AFR_CHILD_UNKNOWN;
+ int query_child = AFR_CHILD_UNKNOWN;
int possible_bad_child = AFR_CHILD_UNKNOWN;
int ret = 0;
int op_errno = ENOMEM;
@@ -134,18 +113,18 @@ afr_ta_read_txn(void *opaque)
this = frame->this;
local = frame->local;
priv = this->private;
+ query_child = local->read_txn_query_child;
- if (local->child_up[AFR_CHILD_ZERO]) {
- up_child = AFR_CHILD_ZERO;
+ if (query_child == AFR_CHILD_ZERO) {
possible_bad_child = AFR_CHILD_ONE;
- } else if (local->child_up[AFR_CHILD_ONE]) {
- up_child = AFR_CHILD_ONE;
+ } else if (query_child == AFR_CHILD_ONE) {
possible_bad_child = AFR_CHILD_ZERO;
+ } else {
+ /*read_txn_query_child is AFR_CHILD_UNKNOWN*/
+ goto out;
}
- GF_ASSERT(up_child != AFR_CHILD_UNKNOWN);
-
- /* Query the up_child to see if it blames the down one. */
+ /* Ask the query_child to see if it blames the possibly bad one. */
xdata_req = dict_new();
if (!xdata_req)
goto out;
@@ -159,30 +138,33 @@ afr_ta_read_txn(void *opaque)
goto out;
if (local->fd) {
- ret = syncop_fxattrop(priv->children[up_child], local->fd,
+ ret = syncop_fxattrop(priv->children[query_child], local->fd,
GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
NULL);
} else {
- ret = syncop_xattrop(priv->children[up_child], &local->loc,
+ ret = syncop_xattrop(priv->children[query_child], &local->loc,
GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
NULL);
}
if (ret || !xdata_rsp) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed xattrop for gfid %s on %s",
- uuid_utoa(local->inode->gfid), priv->children[up_child]->name);
+ uuid_utoa(local->inode->gfid),
+ priv->children[query_child]->name);
op_errno = -ret;
goto out;
}
if (afr_ta_dict_contains_pending_xattr(xdata_rsp, priv,
possible_bad_child)) {
- read_subvol = up_child;
+ read_subvol = query_child;
goto out;
}
dict_unref(xdata_rsp);
- /* Query thin-arbiter to see if it blames any data brick. */
- ret = afr_fill_ta_loc(this, &loc);
+ xdata_rsp = NULL;
+
+ /* It doesn't. So query thin-arbiter to see if it blames any data brick. */
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc.name);
@@ -211,8 +193,8 @@ afr_ta_read_txn(void *opaque)
goto unlock;
}
- if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, up_child)) {
- read_subvol = up_child;
+ if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, query_child)) {
+ read_subvol = query_child;
} else {
gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
"Failing read for gfid %s since good brick %s is down",
@@ -290,7 +272,7 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
int read_subvol = -1;
inode_t *inode = NULL;
int ret = -1;
- int spb_choice = -1;
+ int spb_subvol = -1;
local = frame->local;
inode = local->inode;
@@ -321,9 +303,9 @@ afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
local->read_attempted[read_subvol] = 1;
readfn:
if (read_subvol == -1) {
- ret = afr_inode_split_brain_choice_get(inode, this, &spb_choice);
- if ((ret == 0) && spb_choice >= 0)
- read_subvol = spb_choice;
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
}
if (read_subvol == -1) {
@@ -431,7 +413,7 @@ afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
local->is_read_txn = _gf_true;
local->transaction.type = type;
- if (priv->quorum_count && !afr_has_quorum(local->child_up, this)) {
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
local->op_ret = -1;
local->op_errno = afr_quorum_errno(priv);
goto read;
@@ -450,6 +432,11 @@ afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
if (priv->thin_arbiter_count &&
AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ if (local->child_up[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (local->child_up[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
afr_ta_read_txn_synctask(frame, this);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 8635b3e9e06..a580a1584cc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -10,10 +10,10 @@
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
#include "afr-messages.h"
-#include "events.h"
+#include <glusterfs/events.h>
void
afr_heal_synctask(xlator_t *this, afr_local_t *local);
@@ -21,7 +21,7 @@ afr_heal_synctask(xlator_t *this, afr_local_t *local);
int
afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
inode_t *inode, struct afr_reply *replies, int source,
- unsigned char *sources, void *gfid)
+ unsigned char *sources, void *gfid, int *gfid_idx)
{
afr_private_t *priv = NULL;
call_frame_t *frame = NULL;
@@ -37,30 +37,54 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
priv = this->private;
wind_on = alloca0(priv->child_count);
- ia_type = replies[source].poststat.ia_type;
- if ((ia_type == IA_INVAL) &&
- (AFR_COUNT(sources, priv->child_count) == priv->child_count)) {
- /* If a file is present on some bricks of the replica but parent
- * dir does not have pending xattrs, all bricks are sources and
- * the 'source' we selected earlier might be one where the file
- * is not actually present. Hence check if file is present in
- * any of the sources.*/
- for (i = 0; i < priv->child_count; i++) {
+ if (source >= 0 && replies[source].valid && replies[source].op_ret == 0)
+ ia_type = replies[source].poststat.ia_type;
+
+ if (ia_type != IA_INVAL)
+ goto heal;
+
+ /* If ia_type is still invalid, it means either
+ * (a)'source' was -1, i.e. parent dir pending xattrs are in split-brain
+ * (or) (b) The parent dir pending xattrs are all zeroes (i.e. all bricks
+ * are sources) and the 'source' we selected earlier might be the one where
+ * the file is not actually present.
+ *
+ * In both cases, let us pick a brick with a successful reply and use its
+ * ia_type.
+ * */
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ /* case (a) above. */
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+ } else {
+ /* case (b) above. */
if (i == source)
continue;
- if (sources[i] && replies[i].valid && replies[i].op_ret == 0) {
+ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
ia_type = replies[i].poststat.ia_type;
break;
}
}
}
+heal:
/* gfid heal on those subvolumes that do not have gfid associated
* with the inode and update those replies.
*/
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid || replies[i].op_ret != 0)
continue;
+
+ if (gf_uuid_is_null(gfid) &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
+ replies[i].poststat.ia_type == ia_type)
+ gfid = replies[i].poststat.ia_gfid;
+
if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
replies[i].poststat.ia_type != ia_type)
continue;
@@ -103,7 +127,22 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
afr_reply_wipe(&replies[i]);
afr_reply_copy(&replies[i], &local->replies[i]);
}
+ if (gfid_idx && (*gfid_idx == -1)) {
+ /*Pick a brick where the gifd heal was successful.*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid)) {
+ *gfid_idx = i;
+ break;
+ }
+ }
+ }
out:
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
+ ret = -afr_final_errno(local, priv);
+ }
loc_wipe(&loc);
if (frame)
AFR_STACK_DESTROY(frame);
@@ -136,27 +175,23 @@ afr_selfheal_gfid_mismatch_by_majority(struct afr_reply *replies,
{
int j = 0;
int i = 0;
- int src = -1;
- int votes[child_count];
+ int votes;
for (i = 0; i < child_count; i++) {
if (!replies[i].valid || replies[i].op_ret == -1)
continue;
- votes[i] = 1;
+ votes = 1;
for (j = i + 1; j < child_count; j++) {
if ((!gf_uuid_compare(replies[i].poststat.ia_gfid,
replies[j].poststat.ia_gfid)))
- votes[i]++;
- if (votes[i] > child_count / 2) {
- src = i;
- goto out;
- }
+ votes++;
+ if (votes > child_count / 2)
+ return i;
}
}
-out:
- return src;
+ return -1;
}
int
@@ -232,10 +267,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
"All the bricks should be up to resolve the gfid split "
"barin");
if (xdata) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "All the "
- "bricks should be up to resolve the"
- " gfid split barin");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SALL_BRICKS_UP_TO_RESOLVE);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
"Error setting"
@@ -245,7 +278,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
}
if (xdata) {
- ret = dict_get_int32(xdata, "heal-op", &heal_op);
+ ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op);
if (ret)
goto fav_child;
} else {
@@ -258,10 +291,10 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
priv->child_count);
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
- "No bigger file");
+ SNO_BIGGER_FILE);
if (xdata) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "No bigger file");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_BIGGER_FILE);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_DICT_SET_FAILED,
@@ -276,10 +309,10 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
priv->child_count);
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
- "No difference in mtime");
+ SNO_DIFF_IN_MTIME);
if (xdata) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "No difference in mtime");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_DIFF_IN_MTIME);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_DICT_SET_FAILED,
@@ -290,7 +323,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
break;
case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
- ret = dict_get_str(xdata, "child-name", &src_brick);
+ ret = dict_get_str_sizen(xdata, "child-name", &src_brick);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
"Error getting the source "
@@ -301,12 +334,10 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
src_brick);
if (*src == -1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
- "Error getting the source "
- "brick");
+ SERROR_GETTING_SRC_BRICK);
if (xdata) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "Error getting the source "
- "brick");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SERROR_GETTING_SRC_BRICK);
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_DICT_SET_FAILED,
@@ -360,11 +391,12 @@ out:
uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2),
priv->children[src_idx]->name);
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;type=gfid;file="
"<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;"
"child-%d=%s;gfid-%d=%s",
- this->name, uuid_utoa(pargfid), bname, child_idx,
- priv->children[child_idx]->name, child_idx,
+ this->ctx->cmd_args.client_pid, this->name, uuid_utoa(pargfid),
+ bname, child_idx, priv->children[child_idx]->name, child_idx,
uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
priv->children[src_idx]->name, src_idx,
uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
@@ -481,7 +513,8 @@ afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc,
&replies[source].poststat,
- (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME), NULL);
+ (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME),
+ NULL);
loc_wipe(&loc);
@@ -878,7 +911,7 @@ afr_dict_contains_heal_op(call_frame_t *frame)
local = frame->local;
xdata_req = local->xdata_req;
- ret = dict_get_int32(xdata_req, "heal-op", &heal_op);
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
if (ret)
return _gf_false;
if (local->xdata_rsp == NULL) {
@@ -886,8 +919,8 @@ afr_dict_contains_heal_op(call_frame_t *frame)
if (!local->xdata_rsp)
return _gf_true;
}
- ret = dict_set_str(local->xdata_rsp, "sh-fail-msg",
- "File not in split-brain");
+ ret = dict_set_sizen_str_sizen(local->xdata_rsp, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
return _gf_true;
}
@@ -941,7 +974,8 @@ afr_mark_split_brain_source_sinks_by_heal_op(
xdata_rsp = local->xdata_rsp;
if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg", SBRAIN_HEAL_NO_GO_MSG);
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRAIN_HEAL_NO_GO_MSG);
ret = -1;
goto out;
}
@@ -952,16 +986,16 @@ afr_mark_split_brain_source_sinks_by_heal_op(
switch (heal_op) {
case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
if (type == AFR_METADATA_TRANSACTION) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg",
- "Use source-brick option to"
- " heal metadata split-brain");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
if (!ret)
ret = -1;
goto out;
}
afr_mark_largest_file_as_source(this, sources, replies);
if (AFR_COUNT(sources, priv->child_count) != 1) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg", "No bigger file");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_BIGGER_FILE);
if (!ret)
ret = -1;
goto out;
@@ -969,36 +1003,36 @@ afr_mark_split_brain_source_sinks_by_heal_op(
break;
case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
if (type == AFR_METADATA_TRANSACTION) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg",
- "Use source-brick option to"
- " heal metadata split-brain");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
if (!ret)
ret = -1;
goto out;
}
afr_mark_latest_mtime_file_as_source(this, sources, replies);
if (AFR_COUNT(sources, priv->child_count) != 1) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg",
- "No difference in mtime");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_DIFF_IN_MTIME);
if (!ret)
ret = -1;
goto out;
}
break;
case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
- ret = dict_get_str(xdata_req, "child-name", &name);
+ ret = dict_get_str_sizen(xdata_req, "child-name", &name);
if (ret)
goto out;
source = afr_get_child_index_from_name(this, name);
if (source < 0) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg",
- "Invalid brick name");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SINVALID_BRICK_NAME);
if (!ret)
ret = -1;
goto out;
}
if (locked_on[source] != 1) {
- ret = dict_set_str(xdata_rsp, "sh-fail-msg", "Brick is not up");
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRICK_IS_NOT_UP);
if (!ret)
ret = -1;
goto out;
@@ -1139,7 +1173,8 @@ afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode)
}
/*
- * afr_sh_fav_by_size: Choose favorite child by size.
+ * afr_sh_fav_by_size: Choose favorite child by size
+ * when not all files are of zero size.
*/
int
afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode)
@@ -1150,19 +1185,32 @@ afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode)
uint64_t cmp_sz = 0;
priv = this->private;
-
for (i = 0; i < priv->child_count; i++) {
- if (replies[i].valid == 1) {
- gf_msg_debug(this->name, 0,
- "Child:%s file size = %" PRIu64 " for gfid %s",
- priv->children[i]->name, replies[i].poststat.ia_size,
- uuid_utoa(inode->gfid));
- if (replies[i].poststat.ia_size > cmp_sz) {
- cmp_sz = replies[i].poststat.ia_size;
- fav_child = i;
- }
+ if (!replies[i].valid) {
+ continue;
+ }
+ gf_msg_debug(this->name, 0,
+ "Child:%s file size = %" PRIu64 " for gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_size,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_type == IA_IFDIR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Cannot perform selfheal on %s. "
+ "Size policy is not applicable to directories.",
+ uuid_utoa(inode->gfid));
+ break;
+ }
+ if (replies[i].poststat.ia_size > cmp_sz) {
+ cmp_sz = replies[i].poststat.ia_size;
+ fav_child = i;
+ } else if (replies[i].poststat.ia_size == cmp_sz) {
+ fav_child = -1;
}
}
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "No bigger file");
+ }
return fav_child;
}
@@ -1228,7 +1276,10 @@ afr_mark_split_brain_source_sinks_by_policy(
priv = this->private;
fav_child = afr_sh_get_fav_by_policy(this, replies, inode, &policy_str);
- if (fav_child > priv->child_count - 1) {
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "No child selected by favorite-child policy.");
+ } else if (fav_child > priv->child_count - 1) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
"Invalid child (%d) "
"selected by policy %s.",
@@ -1359,7 +1410,7 @@ afr_mark_split_brain_source_sinks(
if (source >= 0)
return source;
- ret = dict_get_int32(xdata_req, "heal-op", &heal_op);
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
if (ret)
goto autoheal;
@@ -1375,7 +1426,7 @@ autoheal:
frame, this, inode, sources, sinks, healed_sinks, locked_on,
replies, type);
if (source != -1) {
- ret = dict_set_int32(xdata_req, "fav-child-policy", 1);
+ ret = dict_set_int32_sizen(xdata_req, "fav-child-policy", 1);
if (ret)
return -1;
}
@@ -1406,7 +1457,7 @@ _afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this,
priv = this->private;
local = frame->local;
- if (!dict_get(local->xdata_req, "fav-child-policy"))
+ if (!dict_get_sizen(local->xdata_req, "fav-child-policy"))
return 0;
xdata = dict_new();
@@ -1524,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
char *accused = NULL; /* Accused others without any self-accusal */
char *pending = NULL; /* Have pending operations on others */
char *self_accused = NULL; /* Accused itself */
- int min_participants = -1;
priv = this->private;
@@ -1548,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION) {
- min_participants = priv->child_count;
- } else {
- min_participants = AFR_SH_MIN_PARTICIPANTS;
- }
- if (afr_success_count(replies, priv->child_count) < min_participants) {
+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
/* Treat this just like locks not being acquired */
return -ENOTCONN;
}
@@ -1614,7 +1659,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
}
}
- if (type == AFR_DATA_TRANSACTION)
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
afr_selfheal_post_op_failure_accounting(priv, accused, sources,
locked_on);
@@ -1722,11 +1767,9 @@ afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (xdata) {
local->replies[i].xdata = dict_ref(xdata);
ret = dict_get_int8(xdata, "link-count", &need_heal);
- local->replies[i].need_heal = need_heal;
- } else {
- local->replies[i].need_heal = need_heal;
}
+ local->replies[i].need_heal = need_heal;
syncbarrier_wake(&local->barrier);
return 0;
@@ -1782,10 +1825,41 @@ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
return inode;
}
+static int
+afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
+
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+
+ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1);
+ if (ret)
+ return ret;
+
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ ret = dict_set_uint32(dict, key1, 1);
+ if (ret)
+ return ret;
+
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+ ret = dict_set_uint32(dict, key2, 1);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
int
afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
uuid_t gfid, struct afr_reply *replies,
- unsigned char *discover_on)
+ unsigned char *discover_on, dict_t *dict)
{
loc_t loc = {
0,
@@ -1800,12 +1874,19 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
xattr_req = dict_new();
if (!xattr_req)
return -ENOMEM;
+ if (dict)
+ dict_copy(dict, xattr_req);
if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
dict_unref(xattr_req);
return -ENOMEM;
}
+ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) {
+ dict_unref(xattr_req);
+ return -1;
+ }
+
loc.inode = inode_ref(inode);
gf_uuid_copy(loc.gfid, gfid);
@@ -1824,12 +1905,16 @@ int
afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ dict_t *dict = NULL;
- priv = frame->this->private;
+ local = frame->local;
+
+ if (local->xattr_req)
+ dict = local->xattr_req;
return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
- priv->child_up);
+ local->child_up, dict);
}
unsigned int
@@ -2196,7 +2281,8 @@ int
afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
inode_t **link_inode, gf_boolean_t *data_selfheal,
gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal)
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies_dst)
{
afr_private_t *priv = NULL;
inode_t *inode = NULL;
@@ -2255,11 +2341,13 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
priv->children[i]->name,
uuid_utoa(replies[i].poststat.ia_gfid));
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;"
"type=file;gfid=%s;"
"ia_type-%d=%s;ia_type-%d=%s",
- this->name, uuid_utoa(replies[i].poststat.ia_gfid),
- first_idx, gf_inode_type_to_str(first.ia_type), i,
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(replies[i].poststat.ia_gfid), first_idx,
+ gf_inode_type_to_str(first.ia_type), i,
gf_inode_type_to_str(replies[i].poststat.ia_type));
ret = -EIO;
goto out;
@@ -2330,6 +2418,8 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
ret = 0;
out:
+ if (replies && replies_dst)
+ afr_replies_copy(replies_dst, replies, priv->child_count);
if (inode)
inode_unref(inode);
if (replies)
@@ -2369,8 +2459,11 @@ afr_frame_create(xlator_t *this, int32_t *op_errno)
pid_t pid = GF_CLIENT_PID_SELF_HEALD;
frame = create_frame(this, this->ctx->pool);
- if (!frame)
+ if (!frame) {
+ if (op_errno)
+ *op_errno = ENOMEM;
return NULL;
+ }
local = AFR_FRAME_INIT(frame, (*op_errno));
if (!local) {
@@ -2441,17 +2534,12 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
gf_boolean_t metadata_selfheal = _gf_false;
gf_boolean_t entry_selfheal = _gf_false;
afr_private_t *priv = NULL;
- gf_boolean_t dataheal_enabled = _gf_false;
priv = this->private;
- ret = gf_string2boolean(priv->data_self_heal, &dataheal_enabled);
- if (ret)
- goto out;
-
ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode,
&data_selfheal, &metadata_selfheal,
- &entry_selfheal);
+ &entry_selfheal, NULL);
if (ret)
goto out;
@@ -2468,7 +2556,7 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
}
}
- if (data_selfheal && dataheal_enabled)
+ if (data_selfheal && priv->data_self_heal)
data_ret = afr_selfheal_data(frame, this, fd);
if (metadata_selfheal && priv->metadata_self_heal)
@@ -2662,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
out:
return source;
}
+
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
+ }
+ }
+
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
+
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
+ }
+ goto out;
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 8cd79c73aa4..37bcc2b3f9e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -10,15 +10,10 @@
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
#include "afr-messages.h"
-#include "events.h"
-
-enum {
- AFR_SELFHEAL_DATA_FULL = 0,
- AFR_SELFHEAL_DATA_DIFF,
-};
+#include <glusterfs/events.h>
#define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size))
static int
@@ -73,7 +68,7 @@ __afr_can_skip_data_block_heal(call_frame_t *frame, xlator_t *this, fd_t *fd,
xdata = dict_new();
if (!xdata)
goto out;
- if (dict_set_int32(xdata, "check-zero-filled", 1)) {
+ if (dict_set_int32_sizen(xdata, "check-zero-filled", 1)) {
dict_unref(xdata);
goto out;
}
@@ -230,24 +225,40 @@ __afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd,
return ret;
}
+static gf_boolean_t
+afr_source_sinks_locked(xlator_t *this, unsigned char *locked_on, int source,
+ unsigned char *healed_sinks)
+{
+ afr_private_t *priv = this->private;
+ int i = 0;
+
+ if (!locked_on[source])
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && locked_on[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
static int
afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
int source, unsigned char *healed_sinks, off_t offset,
size_t size, int type, struct afr_reply *replies)
{
int ret = -1;
- int sink_count = 0;
afr_private_t *priv = NULL;
unsigned char *data_lock = NULL;
priv = this->private;
- sink_count = AFR_COUNT(healed_sinks, priv->child_count);
data_lock = alloca0(priv->child_count);
ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
data_lock);
{
- if (ret < sink_count) {
+ if (!afr_source_sinks_locked(this, data_lock, source, healed_sinks)) {
ret = -ENOTCONN;
goto unlock;
}
@@ -301,7 +312,7 @@ afr_data_self_heal_type_get(afr_private_t *priv, unsigned char *healed_sinks,
int type = AFR_SELFHEAL_DATA_FULL;
int i = 0;
- if (priv->data_self_heal_algorithm == NULL) {
+ if (priv->data_self_heal_algorithm == AFR_SELFHEAL_DATA_DYNAMIC) {
type = AFR_SELFHEAL_DATA_FULL;
for (i = 0; i < priv->child_count; i++) {
if (!healed_sinks[i] && i != source)
@@ -311,10 +322,8 @@ afr_data_self_heal_type_get(afr_private_t *priv, unsigned char *healed_sinks,
break;
}
}
- } else if (strcmp(priv->data_self_heal_algorithm, "full") == 0) {
- type = AFR_SELFHEAL_DATA_FULL;
- } else if (strcmp(priv->data_self_heal_algorithm, "diff") == 0) {
- type = AFR_SELFHEAL_DATA_DIFF;
+ } else {
+ type = priv->data_self_heal_algorithm;
}
return type;
}
@@ -331,6 +340,9 @@ afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
call_frame_t *iter_frame = NULL;
unsigned char arbiter_sink_status = 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing data selfheal on %s", uuid_utoa(fd->inode->gfid));
+
priv = this->private;
if (priv->arbiter_count) {
arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
@@ -382,17 +394,18 @@ __afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
- unsigned char arbiter_sink_status = 0;
int i = 0;
local = frame->local;
priv = this->private;
- if (priv->arbiter_count) {
- arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
- healed_sinks[ARBITER_BRICK_INDEX] = 0;
- }
-
+ /* This will send truncate on the arbiter brick as well if it is marked as
+ * sink. If changelog is enabled on the volume it captures truncate as a
+ * data transactions on the arbiter brick. This will help geo-rep to
+ * properly sync the data from master to slave if arbiter is the ACTIVE
+ * brick during syncing and which had got some entries healed for data as
+ * part of self heal.
+ */
AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, size,
NULL);
@@ -403,8 +416,6 @@ __afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
*/
healed_sinks[i] = 0;
- if (arbiter_sink_status)
- healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
return 0;
}
@@ -545,9 +556,11 @@ __afr_selfheal_data_finalize_source(
replies, AFR_DATA_TRANSACTION);
if (source < 0) {
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;type=data;"
"file=%s",
- this->name, uuid_utoa(inode->gfid));
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
return -EIO;
}
@@ -700,19 +713,18 @@ __afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd,
goto unlock;
}
- if (priv->arbiter_count &&
- AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
- healed_sinks[ARBITER_BRICK_INDEX]) {
- is_arbiter_the_only_sink = _gf_true;
- goto restore_time;
- }
-
ret = __afr_selfheal_truncate_sinks(
frame, this, fd, healed_sinks,
locked_replies[source].poststat.ia_size);
if (ret < 0)
goto unlock;
+ if (priv->arbiter_count &&
+ AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
+ healed_sinks[ARBITER_BRICK_INDEX]) {
+ is_arbiter_the_only_sink = _gf_true;
+ goto restore_time;
+ }
ret = 0;
}
unlock:
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 619558e94b7..64893f441e3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -10,60 +10,176 @@
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
#include "afr-messages.h"
-#include "syncop-utils.h"
-#include "events.h"
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/events.h>
-static int
-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
- inode_t *inode, int child, struct afr_reply *replies)
+int
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
xlator_t *subvol = NULL;
int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
loc_t loc = {
0,
};
- char g[64];
priv = this->private;
-
subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
loc.parent = inode_ref(dir);
gf_uuid_copy(loc.pargfid, dir->gfid);
loc.name = name;
- loc.inode = inode_ref(inode);
- if (replies[child].valid && replies[child].op_ret == 0) {
- switch (replies[child].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
- subvol->name);
- ret = syncop_unlink(subvol, &loc, NULL, NULL);
- break;
- }
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
}
+out:
loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
return ret;
}
int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
+{
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
+ case IA_IFDIR:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
unsigned char *sources, inode_t *dir,
const char *name, inode_t *inode,
@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
loc_t srcloc = {
0,
};
+ loc_t anonloc = {
+ 0,
+ };
xlator_t *this = frame->this;
afr_private_t *priv = NULL;
dict_t *xdata = NULL;
@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
0,
};
unsigned char *newentry = NULL;
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
priv = this->private;
iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
"Invalid ia_type (%d) or gfid(%s). source brick=%d, "
"pargfid=%s, name=%s",
- iatt->ia_type, uuid_utoa(iatt->ia_gfid), source,
- uuid_utoa(dir->gfid), name);
+ iatt->ia_type, iatt_uuid_str, source,
+ uuid_utoa_r(dir->gfid, dir_uuid_str), name);
ret = -EINVAL;
goto out;
}
@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
srcloc.inode = inode_ref(inode);
gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
- if (iatt->ia_type != IA_IFDIR)
- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) {
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
newentry[dst] = 1;
ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
sources, newentry);
if (ret)
goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
}
mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
@@ -149,7 +281,7 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
}
break;
default:
- ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ ret = dict_set_int32_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
if (ret)
goto out;
ret = syncop_mknod(
@@ -165,6 +297,7 @@ out:
GF_FREE(linkname);
loc_wipe(&loc);
loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
return ret;
}
@@ -192,7 +325,7 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (replies[source].op_ret == 0) {
ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
source, sources,
- &replies[source].poststat.ia_gfid);
+ &replies[source].poststat.ia_gfid, NULL);
if (ret)
return ret;
}
@@ -246,6 +379,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
if (replies[i].op_ret != 0)
continue;
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
+ src_idx = i;
+ ia_type = replies[src_idx].poststat.ia_type;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ continue;
+ }
+
if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
(ia_type == replies[i].poststat.ia_type)) {
ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
@@ -269,11 +415,12 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
gf_inode_type_to_str(replies[src_idx].poststat.ia_type),
priv->children[src_idx]->name);
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;type=file;"
"file=<gfid:%s>/%s>;count=2;child-%d=%s;type-"
"%d=%s;child-%d=%s;type-%d=%s",
- this->name, uuid_utoa(pargfid), bname, i,
- priv->children[i]->name, i,
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name, i,
gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx,
priv->children[src_idx]->name, src_idx,
gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
@@ -319,7 +466,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
source, sources,
- &replies[source].poststat.ia_gfid);
+ &replies[source].poststat.ia_gfid, NULL);
if (ret)
return ret;
@@ -465,6 +612,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
afr_private_t *priv = NULL;
int source = -1;
int sources_count = 0;
+ int i = 0;
priv = this->private;
@@ -478,6 +626,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
}
source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
+
+ /*If the selected source does not blame any other brick, then mark
+ * everything as sink to trigger conservative merge.
+ */
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ return -1;
+ }
+
return source;
}
@@ -548,10 +710,15 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
+ return 0;
+ }
+
xattr = dict_new();
if (!xattr)
return -ENOMEM;
- ret = dict_set_int32(xattr, GF_GFIDLESS_LOOKUP, 1);
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
if (ret) {
dict_unref(xattr);
return -1;
@@ -568,7 +735,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes "
@@ -596,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
replies);
if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
- ret = afr_shd_index_purge(subvol, parent_idx_inode, name,
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
inode->ia_type);
/* Why is ret force-set to 0? We do not care about
* index purge failing for full heal as it is quite
@@ -726,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- if (__is_root_gfid(fd->inode->gfid) &&
- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))
- continue;
-
ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
loc.inode, subvol,
local->need_full_crawl);
@@ -792,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
/* The name indices under the pgfid index dir are guaranteed
* to be regular files. Hence the hardcoding.
*/
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
ret = 0;
goto out;
}
@@ -831,6 +994,8 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
subvol = priv->children[subvol_idx];
args.frame = afr_copy_frame(frame);
+ if (!args.frame)
+ goto out;
args.xl = this;
/* args.heal_fd represents the fd associated with the original directory
* on which entry heal is being attempted.
@@ -849,9 +1014,10 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
* do not treat heal as failure.
*/
if (is_src)
- return -errno;
+ ret = -errno;
else
- return 0;
+ ret = 0;
+ goto out;
}
ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args,
@@ -861,7 +1027,9 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (args.mismatch == _gf_true)
ret = -1;
-
+out:
+ if (args.frame)
+ AFR_STACK_DESTROY(args.frame);
return ret;
}
@@ -957,7 +1125,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
@@ -1013,6 +1181,8 @@ unlock:
goto postop_unlock;
}
+ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
+ locked_replies);
ret = afr_selfheal_undo_pending(
frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
AFR_ENTRY_TRANSACTION, locked_replies, postop_lock);
@@ -1079,7 +1249,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
NULL, locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
gf_msg_debug(this->name, 0,
"%s: Skipping "
"entry self-heal as only %d sub-volumes could "
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index ea2a7bfd52f..03f43bad16e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -10,9 +10,9 @@
#include "afr.h"
#include "afr-self-heal.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "protocol-common.h"
-#include "events.h"
+#include <glusterfs/events.h>
#define AFR_HEAL_ATTR (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE)
@@ -190,6 +190,59 @@ out:
return ret;
}
+static int
+__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *sources)
+{
+ int ret = 0;
+ int i = 0;
+ int m_idx = 0;
+ afr_private_t *priv = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ raw[m_idx] = 1;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
+ "Failed to set pending metadata xattr on child %d for %s", i,
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ }
+
+ afr_replies_wipe(replies, priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
+
/*
* Look for mismatching uid/gid or mode or user xattrs even if
* AFR xattrs don't say so, and pick one arbitrarily as winner. */
@@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
};
int source = -1;
int sources_count = 0;
+ int ret = 0;
priv = this->private;
@@ -242,9 +296,11 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
if (!priv->metadata_splitbrain_forced_heal) {
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;"
"type=metadata;file=%s",
- this->name, uuid_utoa(inode->gfid));
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
return -EIO;
}
@@ -298,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
healed_sinks[i] = 1;
}
}
-
+ if ((sources_count == priv->child_count) && (source > -1) &&
+ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
+ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
+ replies, sources);
+ if (ret < 0)
+ return ret;
+ }
out:
afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
return source;
@@ -396,7 +458,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
data_lock);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}
@@ -419,12 +481,8 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
if (ret)
goto unlock;
- /* Restore atime/mtime for files that don't need data heal as
- * restoring timestamps happens only as a part of data-heal.
- */
- if (!IA_ISREG(locked_replies[source].poststat.ia_type))
- afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
- locked_replies);
+ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
+ locked_replies);
ret = afr_selfheal_undo_pending(
frame, this, inode, sources, sinks, healed_sinks, undid_pending,
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 39aacee6ecf..834aac86d48 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -8,7 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "events.h"
+#include <glusterfs/events.h>
#include "afr.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
@@ -18,7 +18,8 @@ __afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies, void *gfid,
unsigned char *locked_on, int source,
- unsigned char *sources, gf_boolean_t is_gfid_absent)
+ unsigned char *sources, gf_boolean_t is_gfid_absent,
+ int *gfid_idx)
{
int ret = 0;
int up_count = 0;
@@ -46,8 +47,8 @@ __afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
}
}
- afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
- sources, gfid);
+ ret = afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
+ sources, gfid, gfid_idx);
out:
return ret;
@@ -97,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
const char *bname, inode_t *inode,
struct afr_reply *replies)
{
- loc_t loc = {
- 0,
- };
int i = 0;
afr_private_t *priv = NULL;
- char g[64];
int ret = 0;
priv = this->private;
- loc.parent = inode_ref(parent);
- gf_uuid_copy(loc.pargfid, pargfid);
- loc.name = bname;
- loc.inode = inode_ref(inode);
-
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
continue;
@@ -119,62 +111,13 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
if (replies[i].op_ret)
continue;
- switch (replies[i].poststat.ia_type) {
- case IA_IFDIR:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL);
- break;
- default:
- gf_msg(this->name, GF_LOG_WARNING, 0,
- AFR_MSG_EXPUNGING_FILE_OR_DIR,
- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid),
- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g),
- priv->children[i]->name);
-
- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL);
- break;
- }
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
}
- loc_wipe(&loc);
-
return ret;
}
-/* This function is to be called after ensuring that there is no gfid mismatch
- * for the inode across multiple sources
- */
-static int
-afr_selfheal_gfid_idx_get(xlator_t *this, struct afr_reply *replies,
- unsigned char *sources)
-{
- int i = 0;
- int gfid_idx = -1;
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!replies[i].valid || replies[i].op_ret != 0)
- continue;
-
- if (!sources[i])
- continue;
-
- if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
- continue;
-
- gfid_idx = i;
- break;
- }
- return gfid_idx;
-}
-
static gf_boolean_t
afr_selfheal_name_need_heal_check(xlator_t *this, struct afr_reply *replies)
{
@@ -250,13 +193,14 @@ afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies,
gf_inode_type_to_str(inode_type),
priv->children[type_idx]->name);
gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
"subvol=%s;type=file;"
"file=<gfid:%s>/%s;count=2;"
"child-%d=%s;type-%d=%s;child-%d=%s;"
"type-%d=%s",
- this->name, uuid_utoa(pargfid), bname, i,
- priv->children[i]->name, i,
- gf_inode_type_to_str(inode_type1), type_idx,
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name,
+ i, gf_inode_type_to_str(inode_type1), type_idx,
priv->children[type_idx]->name, type_idx,
gf_inode_type_to_str(inode_type));
return -EIO;
@@ -305,9 +249,8 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
bname, gfid_idx_iter, i,
locked_on, gfid_idx, xdata);
if (!ret && *gfid_idx >= 0) {
- ret = dict_set_str(xdata, "gfid-heal-msg",
- "GFID split-brain "
- "resolved");
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ "GFID split-brain resolved");
if (ret)
gf_msg(this->name, GF_LOG_ERROR, 0,
AFR_MSG_DICT_SET_FAILED,
@@ -400,21 +343,18 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
gfid = gfid_req;
} else {
gfid = &replies[gfid_idx].poststat.ia_gfid;
+ if (source == -1)
+ /* Either entry split-brain or dirty xattrs are present on parent.*/
+ source = gfid_idx;
}
is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
replies, gfid, locked_on, source, sources,
- is_gfid_absent);
- if (ret)
+ is_gfid_absent, &gfid_idx);
+ if (ret || (gfid_idx < 0))
return ret;
- if (gfid_idx == -1) {
- gfid_idx = afr_selfheal_gfid_idx_get(this, replies, sources);
- if (gfid_idx == -1)
- return -1;
- }
-
ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
inode, replies, gfid_idx);
if (ret == -EIO)
@@ -527,7 +467,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
if (!xattr)
return -ENOMEM;
- ret = dict_set_int32(xattr, GF_GFIDLESS_LOOKUP, 1);
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
if (ret) {
dict_unref(xattr);
return -1;
@@ -545,7 +485,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
locked_on);
{
- if (ret < AFR_SH_MIN_PARTICIPANTS) {
+ if (ret < priv->child_count) {
ret = -ENOTCONN;
goto unlock;
}
@@ -591,13 +531,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
struct afr_reply *replies = NULL;
inode_t *inode = NULL;
int first_idx = -1;
+ afr_local_t *local = NULL;
priv = this->private;
+ local = frame->local;
replies = alloca0(sizeof(*replies) * priv->child_count);
inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
- priv->child_up, NULL);
+ local->child_up, NULL);
if (!inode)
return -ENOMEM;
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 9c7418c7169..48e6dbcfb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -11,8 +11,6 @@
#ifndef _AFR_SELFHEAL_H
#define _AFR_SELFHEAL_H
-#define AFR_SH_MIN_PARTICIPANTS 2
-
/* Perform fop on all UP subvolumes and wait for all callbacks to return */
#define AFR_ONALL(frame, rfn, fop, args...) \
@@ -20,9 +18,8 @@
afr_local_t *__local = frame->local; \
afr_private_t *__priv = frame->this->private; \
int __i = 0, __count = 0; \
- unsigned char *__child_up = NULL; \
+ unsigned char *__child_up = alloca(__priv->child_count); \
\
- __child_up = alloca0(__priv->child_count); \
memcpy(__child_up, __priv->child_up, \
sizeof(*__child_up) * __priv->child_count); \
__count = AFR_COUNT(__child_up, __priv->child_count); \
@@ -48,13 +45,16 @@
afr_local_t *__local = frame->local; \
afr_private_t *__priv = frame->this->private; \
int __i = 0; \
- int __count = AFR_COUNT(list, __priv->child_count); \
+ int __count = 0; \
+ unsigned char *__list = alloca(__priv->child_count); \
\
+ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
+ __count = AFR_COUNT(__list, __priv->child_count); \
__local->barrier.waitfor = __count; \
afr_local_replies_wipe(__local, __priv); \
\
for (__i = 0; __i < __priv->child_count; __i++) { \
- if (!list[__i]) \
+ if (!__list[__i]) \
continue; \
STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
__priv->children[__i], \
@@ -83,9 +83,9 @@
#define ALLOC_MATRIX(n, type) \
({ \
- type **__ptr = NULL; \
int __i; \
- __ptr = alloca0(n * sizeof(type *)); \
+ type **__ptr = alloca(n * sizeof(type *)); \
+ \
for (__i = 0; __i < n; __i++) \
__ptr[__i] = alloca0(n * sizeof(type)); \
__ptr; \
@@ -97,6 +97,27 @@
#define SBRAIN_HEAL_NO_GO_MSG \
"Failed to obtain replies from all bricks of " \
"the replica (are they up?). Cannot resolve split-brain."
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SNO_BIGGER_FILE "No bigger file"
+#define SNO_DIFF_IN_MTIME "No difference in mtime"
+#define SUSE_SOURCE_BRICK_TO_HEAL \
+ "Use source-brick option to heal metadata" \
+ " split-brain"
+#define SINVALID_BRICK_NAME "Invalid brick name"
+#define SBRICK_IS_NOT_UP "Brick is not up"
+#define SBRICK_NOT_CONNECTED "Brick is not connected"
+#define SLESS_THAN2_BRICKS_in_REP "< 2 bricks in replica are up"
+#define SBRICK_IS_REMOTE "Brick is remote"
+#define SSTARTED_SELF_HEAL "Started self-heal"
+#define SOP_NOT_SUPPORTED "Operation Not Supported"
+#define SFILE_NOT_UNDER_DATA \
+ "The file is not under data or metadata " \
+ "split-brain"
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SALL_BRICKS_UP_TO_RESOLVE \
+ "All the bricks should be up to resolve the" \
+ " gfid split brain"
+#define SERROR_GETTING_SRC_BRICK "Error getting the source brick"
int
afr_selfheal(xlator_t *this, uuid_t gfid);
@@ -119,7 +140,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode);
int
afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
inode_t *inode, struct afr_reply *replies, int source,
- unsigned char *sources, void *gfid);
+ unsigned char *sources, void *gfid, int *gfid_idx);
int
afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
@@ -166,7 +187,7 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
int
afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
uuid_t gfid, struct afr_reply *replies,
- unsigned char *discover_on);
+ unsigned char *discover_on, dict_t *dict);
inode_t *
afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
const char *name, struct afr_reply *replies,
@@ -305,7 +326,8 @@ int
afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
inode_t **link_inode, gf_boolean_t *data_selfheal,
gf_boolean_t *metadata_selfheal,
- gf_boolean_t *entry_selfheal);
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies);
int
afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid);
@@ -347,4 +369,9 @@ gf_boolean_t
afr_is_file_empty_on_all_children(afr_private_t *priv,
struct afr_reply *replies);
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index e2de77a9c45..109fd4b7421 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -12,11 +12,10 @@
#include "afr-self-heal.h"
#include "afr-self-heald.h"
#include "protocol-common.h"
-#include "syncop-utils.h"
+#include <glusterfs/syncop-utils.h>
#include "afr-messages.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
-#define SHD_INODE_LRU_LIMIT 2048
#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
#define AFR_STATISTICS_HISTORY_SIZE 50
@@ -95,7 +94,7 @@ __afr_shd_healer_wait(struct subvol_healer *healer)
priv = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + priv->shd.timeout;
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -223,7 +222,7 @@ out:
}
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type)
{
int ret = 0;
@@ -372,8 +371,9 @@ afr_shd_sweep_prepare(struct subvol_healer *healer)
event->split_brain_count = 0;
event->heal_failed_count = 0;
- time(&event->start_time);
+ event->start_time = gf_time();
event->end_time = 0;
+ _mask_cancellation();
}
void
@@ -386,8 +386,8 @@ afr_shd_sweep_done(struct subvol_healer *healer)
event = &healer->crawl_event;
shd = &(((afr_private_t *)healer->this->private)->shd);
- time(&event->end_time);
- history = memdup(event, sizeof(*event));
+ event->end_time = gf_time();
+ history = gf_memdup(event, sizeof(*event));
event->start_time = 0;
if (!history)
@@ -395,6 +395,7 @@ afr_shd_sweep_done(struct subvol_healer *healer)
if (eh_save_history(shd->statistics[healer->subvol], history) < 0)
GF_FREE(history);
+ _unmask_cancellation();
}
int
@@ -423,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = afr_shd_selfheal(healer, healer->subvol, gfid);
if (ret == -ENOENT || ret == -ESTALE)
- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val);
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
if (ret == 2)
/* If bricks crashed in pre-op after creating indices/xattrop
@@ -465,7 +466,7 @@ afr_shd_index_sweep(struct subvol_healer *healer, char *vgfid)
}
xdata = dict_new();
- if (!xdata || dict_set_int32(xdata, "get-gfid-type", 1)) {
+ if (!xdata || dict_set_int32_sizen(xdata, "get-gfid-type", 1)) {
ret = -ENOMEM;
goto out;
}
@@ -523,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
afr_private_t *priv = NULL;
priv = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
if (!priv->shd.enabled)
return -EBUSY;
@@ -591,7 +597,9 @@ _afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
{
afr_private_t *priv = NULL;
dict_t *xattr = NULL;
- int *raw = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {
+ 0,
+ };
int ret = -1;
int i = 0;
@@ -603,18 +611,11 @@ _afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
"Failed to create dict.");
goto out;
}
-
for (i = 0; i < priv->child_count; i++) {
- raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t);
- if (!raw)
- goto out;
-
- ret = dict_set_bin(xattr, priv->pending_key[i], raw,
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret) {
- GF_FREE(raw);
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], &raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret)
goto out;
- }
}
ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
@@ -641,6 +642,7 @@ afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, struct subvol_healer *healer,
if (afr_shd_fill_ta_loc(this, loc)) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate thin-arbiter loc for: %s.", loc->name);
+ ret = -1;
goto out;
}
@@ -722,9 +724,9 @@ afr_shd_ta_unset_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
}
ret = dict_set_bin(xattr, priv->pending_key[i], raw,
- AFR_NUM_CHANGE_LOGS * sizeof (int));
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
if (ret) {
- GF_FREE (raw);
+ GF_FREE(raw);
goto out;
}
@@ -799,6 +801,218 @@ afr_bricks_available_for_heal(afr_private_t *priv)
return _gf_true;
}
+static gf_boolean_t
+afr_shd_ta_needs_heal(xlator_t *this, struct subvol_healer *healer)
+{
+ dict_t *xdata = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+ gf_boolean_t need_heal = _gf_false;
+
+ priv = this->private;
+
+ ret = afr_shd_fill_ta_loc(this, &loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ healer->rerun = 1;
+ goto out;
+ }
+
+ if (_afr_shd_ta_get_xattrs(this, &loc, &xdata)) {
+ healer->rerun = 1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (afr_ta_dict_contains_pending_xattr(xdata, priv, i)) {
+ need_heal = _gf_true;
+ break;
+ }
+ }
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+
+ return need_heal;
+}
+
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
+ }
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
+ ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
+}
+
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+out:
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
+}
+
void *
afr_shd_index_healer(void *data)
{
@@ -825,7 +1039,8 @@ afr_shd_index_healer(void *data)
priv->local[healer->subvol] = healer->local;
if (priv->thin_arbiter_count) {
- afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
+ if (afr_shd_ta_needs_heal(this, healer))
+ afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
}
do {
@@ -855,9 +1070,17 @@ afr_shd_index_healer(void *data)
sleep(1);
} while (ret > 0);
- if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) {
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
+ }
+
+ if (ret == 0 && pre_crawl_xdata &&
+ !healer->crawl_event.heal_failed_count) {
afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
pre_crawl_xdata);
+ }
+
+ if (pre_crawl_xdata) {
dict_unref(pre_crawl_xdata);
pre_crawl_xdata = NULL;
}
@@ -975,7 +1198,9 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
{
int ret = 0;
uint64_t count = 0;
- char key[256] = {0};
+ char key[128] = {0};
+ int keylen = 0;
+ char suffix[64] = {0};
int xl_id = 0;
uint64_t healed_count = 0;
uint64_t split_brain_count = 0;
@@ -1010,8 +1235,8 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child);
ret = dict_get_uint64(output, key, &count);
- snprintf(key, sizeof(key), "statistics_healed_cnt-%d-%d-%" PRIu64, xl_id,
- child, count);
+ snprintf(suffix, sizeof(suffix), "%d-%d-%" PRIu64, xl_id, child, count);
+ snprintf(key, sizeof(key), "statistics_healed_cnt-%s", suffix);
ret = dict_set_uint64(output, key, healed_count);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
@@ -1019,8 +1244,7 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
goto out;
}
- snprintf(key, sizeof(key), "statistics_sb_cnt-%d-%d-%" PRIu64, xl_id, child,
- count);
+ snprintf(key, sizeof(key), "statistics_sb_cnt-%s", suffix);
ret = dict_set_uint64(output, key, split_brain_count);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
@@ -1028,17 +1252,15 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
goto out;
}
- snprintf(key, sizeof(key), "statistics_crawl_type-%d-%d-%" PRIu64, xl_id,
- child, count);
- ret = dict_set_str(output, key, crawl_type);
+ keylen = snprintf(key, sizeof(key), "statistics_crawl_type-%s", suffix);
+ ret = dict_set_strn(output, key, keylen, crawl_type);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Could not add statistics_crawl_type to output");
goto out;
}
- snprintf(key, sizeof(key), "statistics_heal_failed_cnt-%d-%d-%" PRIu64,
- xl_id, child, count);
+ snprintf(key, sizeof(key), "statistics_heal_failed_cnt-%s", suffix);
ret = dict_set_uint64(output, key, heal_failed_count);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
@@ -1046,9 +1268,8 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
goto out;
}
- snprintf(key, sizeof(key), "statistics_strt_time-%d-%d-%" PRIu64, xl_id,
- child, count);
- ret = dict_set_dynstr(output, key, start_time_str);
+ keylen = snprintf(key, sizeof(key), "statistics_strt_time-%s", suffix);
+ ret = dict_set_dynstrn(output, key, keylen, start_time_str);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Could not add statistics_crawl_start_time to output");
@@ -1062,11 +1283,10 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
else
progress = 0;
- snprintf(key, sizeof(key), "statistics_end_time-%d-%d-%" PRIu64, xl_id,
- child, count);
+ keylen = snprintf(key, sizeof(key), "statistics_end_time-%s", suffix);
if (!end_time_str)
end_time_str = gf_strdup("Could not determine the end time");
- ret = dict_set_dynstr(output, key, end_time_str);
+ ret = dict_set_dynstrn(output, key, keylen, end_time_str);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Could not add statistics_crawl_end_time to output");
@@ -1075,10 +1295,9 @@ afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
end_time_str = NULL;
}
- snprintf(key, sizeof(key), "statistics_inprogress-%d-%d-%" PRIu64, xl_id,
- child, count);
+ keylen = snprintf(key, sizeof(key), "statistics_inprogress-%s", suffix);
- ret = dict_set_int32(output, key, progress);
+ ret = dict_set_int32n(output, key, keylen, progress);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
"Could not add statistics_inprogress to output");
@@ -1104,7 +1323,9 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
{
int ret = -1;
uint64_t count = 0;
- char key[256] = {0};
+ char key[64] = {0};
+ int keylen = 0;
+ char xl_id_child_str[32] = {0};
int xl_id = 0;
ret = dict_get_int32(output, this->name, &xl_id);
@@ -1114,11 +1335,12 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
goto out;
}
- snprintf(key, sizeof(key), "%d-%d-count", xl_id, child);
+ snprintf(xl_id_child_str, sizeof(xl_id_child_str), "%d-%d", xl_id, child);
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
ret = dict_get_uint64(output, key, &count);
- snprintf(key, sizeof(key), "%d-%d-%" PRIu64, xl_id, child, count);
- ret = dict_set_dynstr(output, key, path);
+ keylen = snprintf(key, sizeof(key), "%s-%" PRIu64, xl_id_child_str, count);
+ ret = dict_set_dynstrn(output, key, keylen, path);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
@@ -1127,7 +1349,7 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
}
if (tv) {
- snprintf(key, sizeof(key), "%d-%d-%" PRIu64 "-time", xl_id, child,
+ snprintf(key, sizeof(key), "%s-%" PRIu64 "-time", xl_id_child_str,
count);
ret = dict_set_uint32(output, key, tv->tv_sec);
if (ret) {
@@ -1137,7 +1359,7 @@ afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
}
}
- snprintf(key, sizeof(key), "%d-%d-count", xl_id, child);
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
ret = dict_set_uint64(output, key, count + 1);
if (ret) {
@@ -1210,10 +1432,6 @@ afr_selfheal_daemon_init(xlator_t *this)
priv = this->private;
shd = &priv->shd;
- this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
- if (!this->itable)
- goto out;
-
shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers),
priv->child_count,
gf_afr_mt_subvol_healer_t);
@@ -1264,12 +1482,18 @@ out:
return ret;
}
-int
-afr_selfheal_childup(xlator_t *this, int subvol)
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv)
{
- afr_shd_index_healer_spawn(this, subvol);
+ int subvol = 0;
- return 0;
+ if (!priv->shd.iamshd)
+ return;
+ for (subvol = 0; subvol < priv->child_count; subvol++)
+ if (priv->child_up[subvol])
+ afr_shd_index_healer_spawn(this, subvol);
+
+ return;
}
int
@@ -1319,41 +1543,78 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
struct subvol_healer *healer = NULL;
int i = 0;
char key[64];
+ int keylen = 0;
+ int this_name_len = 0;
int op_ret = 0;
uint64_t cnt = 0;
+#define AFR_SET_DICT_AND_LOG(name, output, key, keylen, dict_str, \
+ dict_str_len) \
+ { \
+ int ret; \
+ \
+ ret = dict_set_nstrn(output, key, keylen, dict_str, dict_str_len); \
+ if (ret) { \
+ gf_smsg(name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, \
+ "key=%s", key, "value=%s", dict_str, NULL); \
+ } \
+ }
+
priv = this->private;
shd = &priv->shd;
- ret = dict_get_int32(input, "xl-op", (int32_t *)&op);
- if (ret)
+ ret = dict_get_int32_sizen(input, "xl-op", (int32_t *)&op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=xl-op", NULL);
goto out;
- ret = dict_get_int32(input, this->name, &xl_id);
- if (ret)
+ }
+ this_name_len = strlen(this->name);
+ ret = dict_get_int32n(input, this->name, this_name_len, &xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=%s", this->name, NULL);
goto out;
- ret = dict_set_int32(output, this->name, xl_id);
- if (ret)
+ }
+ ret = dict_set_int32n(output, this->name, this_name_len, xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "key=%s", this->name, NULL);
goto out;
+ }
switch (op) {
case GF_SHD_OP_HEAL_INDEX:
op_ret = 0;
for (i = 0; i < priv->child_count; i++) {
healer = &shd->index_healers[i];
- snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
if (!priv->child_up[i]) {
- ret = dict_set_str(output, key, "Brick is not connected");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
op_ret = -1;
} else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
- ret = dict_set_str(output, key,
- "< 2 bricks in replica are up");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
op_ret = -1;
} else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
- ret = dict_set_str(output, key, "Brick is remote");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
} else {
- ret = dict_set_str(output, key, "Started self-heal");
- afr_shd_index_healer_spawn(this, i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
+
+ ret = afr_shd_index_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
}
}
break;
@@ -1362,18 +1623,31 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
for (i = 0; i < priv->child_count; i++) {
healer = &shd->full_healers[i];
- snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
if (!priv->child_up[i]) {
- ret = dict_set_str(output, key, "Brick is not connected");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
} else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
- ret = dict_set_str(output, key,
- "< 2 bricks in replica are up");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
} else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
- ret = dict_set_str(output, key, "Brick is remote");
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
} else {
- ret = dict_set_str(output, key, "Started self-heal");
- afr_shd_full_healer_spawn(this, i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
+
+ ret = afr_shd_full_healer_spawn(this, i);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
op_ret = 0;
}
}
@@ -1381,25 +1655,25 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
case GF_SHD_OP_INDEX_SUMMARY:
/* this case has been handled in glfs-heal.c */
break;
- case GF_SHD_OP_HEALED_FILES:
- case GF_SHD_OP_HEAL_FAILED_FILES:
- for (i = 0; i < priv->child_count; i++) {
- snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
- ret = dict_set_str(output, key,
- "Operation Not "
- "Supported");
- }
- break;
case GF_SHD_OP_SPLIT_BRAIN_FILES:
eh_dump(shd->split_brain, output, afr_add_shd_event);
break;
case GF_SHD_OP_STATISTICS:
for (i = 0; i < priv->child_count; i++) {
eh_dump(shd->statistics[i], output, afr_add_crawl_event);
- afr_shd_dict_add_crawl_event(
+ ret = afr_shd_dict_add_crawl_event(
this, output, &shd->index_healers[i].crawl_event);
- afr_shd_dict_add_crawl_event(this, output,
- &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
}
break;
case GF_SHD_OP_STATISTICS_HEAL_COUNT:
@@ -1408,14 +1682,21 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
for (i = 0; i < priv->child_count; i++) {
if (!priv->child_up[i]) {
- snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
- ret = dict_set_str(output, key, "Brick is not connected");
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id,
+ i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
} else {
snprintf(key, sizeof(key), "%d-%d-hardlinks", xl_id, i);
ret = afr_shd_get_index_count(this, i, &cnt);
if (ret == 0) {
ret = dict_set_uint64(output, key, cnt);
}
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, NULL);
+ }
op_ret = 0;
}
}
@@ -1423,11 +1704,13 @@ afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG,
- "Unknown set op %d", op);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "op=%d",
+ op, NULL);
break;
}
out:
- dict_del(output, this->name);
+ dict_deln(output, this->name, this_name_len);
return op_ret;
+
+#undef AFR_SET_DICT_AND_LOG
}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 7de7c431460..18db728ea7b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -14,12 +14,11 @@
#include <pthread.h>
typedef struct {
- int child;
char *path;
+ int child;
} shd_event_t;
typedef struct {
- int child;
uint64_t healed_count;
uint64_t split_brain_count;
uint64_t heal_failed_count;
@@ -31,38 +30,36 @@ typedef struct {
cralwer is in progress */
time_t end_time;
char *crawl_type;
+ int child;
} crawl_event_t;
struct subvol_healer {
xlator_t *this;
- int subvol;
- gf_boolean_t local;
- gf_boolean_t running;
- gf_boolean_t rerun;
crawl_event_t crawl_event;
pthread_mutex_t mutex;
pthread_cond_t cond;
pthread_t thread;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
};
typedef struct {
- gf_boolean_t iamshd;
- gf_boolean_t enabled;
- int timeout;
struct subvol_healer *index_healers;
struct subvol_healer *full_healers;
eh_t *split_brain;
eh_t **statistics;
+ int timeout;
uint32_t max_threads;
uint32_t wait_qlength;
uint32_t halo_max_latency_msec;
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
} afr_self_heald_t;
int
-afr_selfheal_childup(xlator_t *this, int subvol);
-
-int
afr_selfheal_daemon_init(xlator_t *this);
int
@@ -73,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
char **path_p);
int
-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name,
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
ia_type_t type);
#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index fb78c198d9c..a51f79b1f43 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "timer.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/timer.h>
#include "afr.h"
#include "afr-transaction.h"
@@ -28,6 +28,12 @@ typedef enum {
static void
afr_lock_resume_shared(struct list_head *list);
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this);
+
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno);
+
void
__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared);
@@ -68,6 +74,14 @@ afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this);
static void
afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno);
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv)
+{
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ priv->ta_notify_dom_lock_offset = 0;
+}
+
static void
afr_ta_process_waitq(xlator_t *this)
{
@@ -110,9 +124,9 @@ afr_release_notify_lock_for_ta(void *opaque)
this = (xlator_t *)opaque;
priv = this->private;
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -121,17 +135,16 @@ afr_release_notify_lock_for_ta(void *opaque)
flock.l_len = 1;
ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
AFR_TA_DOM_NOTIFY, &loc, F_SETLK, &flock, NULL, NULL);
- if (!ret) {
- LOCK(&priv->lock);
- priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
- priv->release_ta_notify_dom_lock = _gf_false;
- priv->ta_notify_dom_lock_offset = 0;
- UNLOCK(&priv->lock);
- } else {
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to unlock AFR_TA_DOM_NOTIFY lock.");
}
+ LOCK(&priv->lock);
+ {
+ afr_ta_locked_priv_invalidate(priv);
+ }
+ UNLOCK(&priv->lock);
out:
loc_wipe(&loc);
return ret;
@@ -242,7 +255,7 @@ afr_changelog_has_quorum(afr_local_t *local, xlator_t *this)
}
}
- if (afr_has_quorum(success_children, this)) {
+ if (afr_has_quorum(success_children, this, NULL)) {
return _gf_true;
}
@@ -366,6 +379,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this)
}
local->transaction.unwind(frame, this);
+ GF_ASSERT(list_empty(&local->transaction.owner_list));
+ GF_ASSERT(list_empty(&local->transaction.wait_list));
AFR_STACK_DESTROY(frame);
return 0;
@@ -387,7 +402,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
}
static void
-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
+afr_handle_lock_acquire_failure(afr_local_t *local)
{
struct list_head shared;
afr_lock_t *lock = NULL;
@@ -408,13 +423,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked)
afr_lock_fail_shared(local, &shared);
local->transaction.do_eager_unlock = _gf_true;
out:
- if (locked) {
- local->internal_lock.lock_cbk = afr_transaction_done;
- afr_unlock(local->transaction.frame, local->transaction.frame->this);
- } else {
- afr_transaction_done(local->transaction.frame,
- local->transaction.frame->this);
- }
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ afr_unlock(local->transaction.frame, local->transaction.frame->this);
}
call_frame_t *
@@ -511,42 +521,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
local->transaction.pre_op_sources[j] = 0;
}
-gf_boolean_t
-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t *this = NULL;
- gf_boolean_t fop_failed = _gf_false;
- unsigned char *pre_op_sources = NULL;
- int i = 0;
-
- local = frame->local;
- this = frame->this;
- priv = this->private;
- pre_op_sources = local->transaction.pre_op_sources;
-
- /* If the fop failed on the brick, it is not a source. */
- for (i = 0; i < priv->child_count; i++)
- if (local->transaction.failed_subvols[i])
- pre_op_sources[i] = 0;
-
- switch (AFR_COUNT(pre_op_sources, priv->child_count)) {
- case 1:
- if (pre_op_sources[ARBITER_BRICK_INDEX])
- fop_failed = _gf_true;
- break;
- case 0:
- fop_failed = _gf_true;
- break;
- }
-
- if (fop_failed)
- return _gf_false;
-
- return _gf_true;
-}
-
void
afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
{
@@ -613,7 +587,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
failure_count = AFR_COUNT(local->transaction.failed_subvols,
priv->child_count);
if (failure_count == priv->child_count) {
- afr_handle_lock_acquire_failure(local, _gf_true);
+ afr_handle_lock_acquire_failure(local);
return 0;
} else {
lock = &local->inode_ctx->lock[local->transaction.type];
@@ -660,8 +634,9 @@ afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int **pending)
return ret;
}
+
static void
-afr_ta_dom_lock_check_and_release(afr_local_t *local, xlator_t *this)
+afr_ta_dom_lock_check_and_release(afr_ta_fop_state_t fop_state, xlator_t *this)
{
afr_private_t *priv = this->private;
unsigned int inmem_count = 0;
@@ -671,17 +646,25 @@ afr_ta_dom_lock_check_and_release(afr_local_t *local, xlator_t *this)
LOCK(&priv->lock);
{
/*Once we get notify lock release upcall notification,
- if two fop states are non empty/non zero, we will
- not release lock.
- 1 - If anything in memory txn
- 2 - If anything in onwire or onwireq
+ if any of the fop state counters are non-zero, we will
+ not release the lock.
*/
- if (local->fop_state == TA_INFO_IN_MEMORY_SUCCESS) {
- inmem_count = --priv->ta_in_mem_txn_count;
- } else {
- inmem_count = priv->ta_in_mem_txn_count;
- }
onwire_count = priv->ta_on_wire_txn_count;
+ inmem_count = priv->ta_in_mem_txn_count;
+ switch (fop_state) {
+ case TA_GET_INFO_FROM_TA_FILE:
+ onwire_count = --priv->ta_on_wire_txn_count;
+ break;
+ case TA_INFO_IN_MEMORY_SUCCESS:
+ case TA_INFO_IN_MEMORY_FAILED:
+ inmem_count = --priv->ta_in_mem_txn_count;
+ break;
+ case TA_WAIT_FOR_NOTIFY_LOCK_REL:
+ GF_ASSERT(0);
+ break;
+ case TA_SUCCESS:
+ break;
+ }
release = priv->release_ta_notify_dom_lock;
}
UNLOCK(&priv->lock);
@@ -693,7 +676,7 @@ afr_ta_dom_lock_check_and_release(afr_local_t *local, xlator_t *this)
}
static void
-afr_ta_process_onwireq(afr_local_t *local, xlator_t *this)
+afr_ta_process_onwireq(afr_ta_fop_state_t fop_state, xlator_t *this)
{
afr_private_t *priv = this->private;
afr_local_t *entry = NULL;
@@ -706,15 +689,6 @@ afr_ta_process_onwireq(afr_local_t *local, xlator_t *this)
LOCK(&priv->lock);
{
- if (--priv->ta_on_wire_txn_count == 0) {
- UNLOCK(&priv->lock);
- /*Only one write fop came and after taking notify
- *lock and before doing xattrop, it has received
- *lock contention upcall, so this is the only place
- *to find this out and release the lock*/
- afr_ta_dom_lock_check_and_release(local, this);
- return;
- }
bad_child = priv->ta_bad_child_index;
if (bad_child == AFR_CHILD_UNKNOWN) {
/*The previous on-wire ta_post_op was a failure. Just dequeue
@@ -735,13 +709,10 @@ afr_ta_process_onwireq(afr_local_t *local, xlator_t *this)
while (!list_empty(&onwireq)) {
entry = list_entry(onwireq.next, afr_local_t, ta_onwireq);
list_del_init(&entry->ta_onwireq);
- LOCK(&priv->lock);
- --priv->ta_on_wire_txn_count;
- UNLOCK(&priv->lock);
if (entry->ta_failed_subvol == bad_child) {
- afr_changelog_post_op_do(entry->transaction.frame, this);
+ afr_post_op_handle_success(entry->transaction.frame, this);
} else {
- afr_changelog_post_op_fail(entry->transaction.frame, this, EIO);
+ afr_post_op_handle_failure(entry->transaction.frame, this, EIO);
}
}
}
@@ -760,7 +731,7 @@ afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this)
if (priv->thin_arbiter_count) {
/*fop should not come here with TA_WAIT_FOR_NOTIFY_LOCK_REL state */
- afr_ta_dom_lock_check_and_release(frame->local, this);
+ afr_ta_dom_lock_check_and_release(local->fop_state, this);
}
/* Fail the FOP if post-op did not succeed on quorum no. of bricks. */
@@ -797,21 +768,9 @@ afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno)
unsigned char *
afr_locked_nodes_get(afr_transaction_type type, afr_internal_lock_t *int_lock)
{
- unsigned char *locked_nodes = NULL;
- switch (type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- locked_nodes = int_lock->locked_nodes;
- break;
-
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- /*Because same set of subvols participate in all lockee
- * entities*/
- locked_nodes = int_lock->lockee[0].locked_nodes;
- break;
- }
- return locked_nodes;
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ return int_lock->lockee[0].locked_nodes;
}
int
@@ -869,7 +828,7 @@ afr_handle_symmetric_errors(call_frame_t *frame, xlator_t *this)
}
gf_boolean_t
-afr_has_quorum(unsigned char *subvols, xlator_t *this)
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame)
{
unsigned int quorum_count = 0;
afr_private_t *priv = NULL;
@@ -878,6 +837,9 @@ afr_has_quorum(unsigned char *subvols, xlator_t *this)
priv = this->private;
up_children_count = AFR_COUNT(subvols, priv->child_count);
+ if (afr_lookup_has_quorum(frame, up_children_count))
+ return _gf_true;
+
if (priv->quorum_count == AFR_QUORUM_AUTO) {
/*
* Special case for auto-quorum with an even number of nodes.
@@ -932,7 +894,7 @@ afr_has_fop_quorum(call_frame_t *frame)
locked_nodes = afr_locked_nodes_get(local->transaction.type,
&local->internal_lock);
- return afr_has_quorum(locked_nodes, this);
+ return afr_has_quorum(locked_nodes, this, NULL);
}
static gf_boolean_t
@@ -950,7 +912,7 @@ afr_has_fop_cbk_quorum(call_frame_t *frame)
success[i] = 1;
}
- return afr_has_quorum(success, this);
+ return afr_has_quorum(success, this, NULL);
}
gf_boolean_t
@@ -973,12 +935,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
priv->child_count)
return _gf_false;
- if (priv->arbiter_count) {
- if (!afr_has_arbiter_fop_cbk_quorum(frame))
- need_dirty = _gf_true;
- } else if (!afr_has_fop_cbk_quorum(frame)) {
+ if (!afr_has_fop_cbk_quorum(frame))
need_dirty = _gf_true;
- }
return need_dirty;
}
@@ -1028,12 +986,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this)
* no split-brain with the fix. The problem is eliminated completely.
*/
- if (priv->arbiter_count) {
- if (afr_has_arbiter_fop_cbk_quorum(frame))
- return;
- } else if (afr_has_fop_cbk_quorum(frame)) {
+ if (afr_has_fop_cbk_quorum(frame))
return;
- }
if (afr_need_dirty_marking(frame, this))
goto set_response;
@@ -1075,7 +1029,7 @@ set_response:
}
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc)
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
{
afr_private_t *priv = NULL;
@@ -1083,6 +1037,11 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
loc->parent = inode_ref(priv->root_inode);
gf_uuid_copy(loc->pargfid, loc->parent->gfid);
loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
gf_uuid_copy(loc->gfid, priv->ta_gfid);
loc->inode = inode_new(loc->parent->table);
if (!loc->inode) {
@@ -1092,99 +1051,76 @@ afr_fill_ta_loc(xlator_t *this, loc_t *loc)
return 0;
}
-int
-afr_changelog_thin_arbiter_post_op(xlator_t *this, afr_local_t *local)
+static int
+afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- int failed_count = 0;
- struct gf_flock flock = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- int i = 0;
-
- priv = this->private;
- if (!priv->thin_arbiter_count)
- return 0;
-
- failed_count = AFR_COUNT(local->transaction.failed_subvols,
- priv->child_count);
- if (!failed_count)
- return 0;
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *txn_frame = NULL;
+ afr_ta_fop_state_t fop_state;
- GF_ASSERT(failed_count == 1);
- ret = afr_fill_ta_loc(this, &loc);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Failed to populate thin-arbiter loc for: %s.", loc.name);
- goto out;
- }
+ local = (afr_local_t *)opaque;
+ fop_state = local->fop_state;
+ txn_frame = local->transaction.frame;
+ this = frame->this;
- xattr = dict_new();
- if (!xattr) {
- ret = -ENOMEM;
- goto out;
- }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_static_bin(xattr, priv->pending_key[i],
- local->pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof(int));
- if (ret)
- goto out;
+ if (ret == 0) {
+ /*Mark pending xattrs on the up data brick.*/
+ afr_post_op_handle_success(txn_frame, this);
+ } else {
+ afr_post_op_handle_failure(txn_frame, this, -ret);
}
- flock.l_type = F_WRLCK;
- flock.l_start = 0;
- flock.l_len = 0;
-
- /*TODO: Convert to two domain locking. */
- ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
- AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL);
- if (ret)
- goto out;
+ STACK_DESTROY(frame->root);
+ afr_ta_process_onwireq(fop_state, this);
- ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
- GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ return 0;
+}
- if (ret == -EINVAL) {
- gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_THIN_ARB,
- "Thin-arbiter has denied post-op on %s for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
+int **
+afr_set_changelog_xattr(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, afr_local_t *local)
+{
+ int **changelog = NULL;
+ int idx = 0;
+ int ret = 0;
+ int i;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Post-op on thin-arbiter id file %s failed for gfid %s.",
- priv->pending_key[THIN_ARBITER_BRICK_INDEX],
- uuid_utoa(local->inode->gfid));
+ if (local->is_new_entry == _gf_true) {
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ } else {
+ idx = afr_index_for_transaction_type(local->transaction.type);
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog) {
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ changelog[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
+ }
}
- flock.l_type = F_UNLCK;
- syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX], AFR_TA_DOM_NOTIFY,
- &loc, F_SETLK, &flock, NULL, NULL);
-out:
- if (xattr)
- dict_unref(xattr);
- return ret;
+out:
+ return changelog;
}
-static int
-afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
+static void
+afr_ta_locked_xattrop_validate(afr_private_t *priv, afr_local_t *local,
+ gf_boolean_t *valid)
{
- xlator_t *this = NULL;
- afr_local_t *local = NULL;
-
- local = (afr_local_t *)opaque;
- this = frame->this;
-
- STACK_DESTROY(frame->root);
- afr_ta_process_onwireq(local, this);
-
- return 0;
+ if (priv->ta_event_gen > local->ta_event_gen) {
+ /* We can't trust the ta's response anymore.*/
+ afr_ta_locked_priv_invalidate(priv);
+ *valid = _gf_false;
+ return;
+ }
+ return;
}
static int
@@ -1193,27 +1129,25 @@ afr_ta_post_op_do(void *opaque)
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
xlator_t *this = NULL;
- call_frame_t *txn_frame = NULL;
dict_t *xattr = NULL;
- int **pending = NULL;
+ unsigned char *pending = NULL;
+ int **changelog = NULL;
int failed_subvol = -1;
int success_subvol = -1;
loc_t loc = {
0,
};
- int idx = 0;
int i = 0;
int ret = 0;
+ gf_boolean_t valid = _gf_true;
local = (afr_local_t *)opaque;
- txn_frame = local->transaction.frame;
- this = txn_frame->this;
+ this = local->transaction.frame->this;
priv = this->private;
- idx = afr_index_for_transaction_type(local->transaction.type);
- ret = afr_fill_ta_loc(this, &loc);
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
"Failed to populate loc for thin-arbiter.");
goto out;
}
@@ -1224,23 +1158,23 @@ afr_ta_post_op_do(void *opaque)
goto out;
}
- pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
- if (!pending) {
- ret = -ENOMEM;
- goto out;
- }
+ pending = alloca0(priv->child_count);
+
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.failed_subvols[i]) {
- pending[i][idx] = hton32(1);
+ pending[i] = 1;
failed_subvol = i;
} else {
success_subvol = i;
}
}
- ret = afr_set_pending_dict(priv, xattr, pending);
- if (ret < 0)
+ changelog = afr_set_changelog_xattr(priv, pending, xattr, local);
+
+ if (!changelog) {
+ ret = -ENOMEM;
goto out;
+ }
ret = afr_ta_post_op_lock(this, &loc);
if (ret)
@@ -1248,22 +1182,31 @@ afr_ta_post_op_do(void *opaque)
ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s failed for gfid %s.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa(local->inode->gfid));
+ }
LOCK(&priv->lock);
{
if (ret == 0) {
priv->ta_bad_child_index = failed_subvol;
} else if (ret == -EINVAL) {
priv->ta_bad_child_index = success_subvol;
+ ret = -EIO; /* TA failed the fop. Return EIO to application. */
}
+
+ afr_ta_locked_xattrop_validate(priv, local, &valid);
}
UNLOCK(&priv->lock);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
- "Post-op on thin-arbiter id file %s failed for gfid %s.",
+ if (valid == _gf_false) {
+ gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s for gfid %s invalidated due "
+ "to event-gen mismatch.",
priv->pending_key[THIN_ARBITER_BRICK_INDEX],
uuid_utoa(local->inode->gfid));
- if (ret == -EINVAL)
- ret = -EIO; /* TA failed the fop. Return EIO to application. */
+ ret = -EIO;
}
afr_ta_post_op_unlock(this, &loc);
@@ -1271,17 +1214,11 @@ out:
if (xattr)
dict_unref(xattr);
- if (pending)
- afr_matrix_cleanup(pending, priv->child_count);
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
loc_wipe(&loc);
- if (ret == 0) {
- /*Mark pending xattrs on the up data brick.*/
- afr_changelog_post_op_do(local->transaction.frame, this);
- } else {
- afr_changelog_post_op_fail(local->transaction.frame, this, -ret);
- }
return ret;
}
@@ -1340,6 +1277,7 @@ afr_ta_set_fop_state(afr_private_t *priv, afr_local_t *local,
/* Post-op on TA not needed as the fop succeeded only on the
* in-memory bad data brick and not the good one. Fail the fop.*/
local->fop_state = TA_INFO_IN_MEMORY_FAILED;
+ priv->ta_in_mem_txn_count++;
}
}
UNLOCK(&priv->lock);
@@ -1359,6 +1297,28 @@ afr_ta_fill_failed_subvol(afr_private_t *priv, afr_local_t *local)
}
static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->is_new_entry == _gf_true) {
+ afr_mark_new_entry_changelog(frame, this);
+ }
+ afr_changelog_post_op_do(frame, this);
+
+ return;
+}
+
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno)
+{
+ afr_changelog_post_op_fail(frame, this, op_errno);
+
+ return;
+}
+
+static void
afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
@@ -1380,10 +1340,12 @@ afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
/*Post releasing the notify lock, we will act on this queue*/
break;
case TA_INFO_IN_MEMORY_SUCCESS:
- afr_changelog_post_op_do(frame, this);
+ afr_post_op_handle_success(frame, this);
break;
case TA_INFO_IN_MEMORY_FAILED:
- afr_changelog_post_op_fail(frame, this, EIO);
+ afr_post_op_handle_failure(frame, this, EIO);
+ break;
+ default:
break;
}
return;
@@ -1713,6 +1675,7 @@ afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
int i = 0;
int ret = 0;
char *key = NULL;
+ int keylen = 0;
const char *name = NULL;
dict_t *xdata1 = NULL;
dict_t *xdata2 = NULL;
@@ -1770,7 +1733,8 @@ afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
}
if (need_entry_key_set) {
- ret = dict_set_str(xdata1, key, (char *)name);
+ keylen = strlen(key);
+ ret = dict_set_strn(xdata1, key, keylen, (char *)name);
if (ret)
gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
"%s/%s: Could not set %s key during xattrop",
@@ -1780,7 +1744,8 @@ afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
if (!xdata2)
goto out;
- ret = dict_set_str(xdata2, key, (char *)local->newloc.name);
+ ret = dict_set_strn(xdata2, key, keylen,
+ (char *)local->newloc.name);
if (ret)
gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
"%s/%s: Could not set %s key during "
@@ -2042,7 +2007,7 @@ err:
local->op_ret = -1;
local->op_errno = op_errno;
- afr_handle_lock_acquire_failure(local, _gf_true);
+ afr_handle_lock_acquire_failure(local);
if (xdata_req)
dict_unref(xdata_req);
@@ -2051,7 +2016,7 @@ err:
}
int
-afr_post_nonblocking_inodelk_cbk(call_frame_t *frame, xlator_t *this)
+afr_post_nonblocking_lock_cbk(call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
@@ -2062,36 +2027,12 @@ afr_post_nonblocking_inodelk_cbk(call_frame_t *frame, xlator_t *this)
/* Initiate blocking locks if non-blocking has failed */
if (int_lock->lock_op_ret < 0) {
gf_msg_debug(this->name, 0,
- "Non blocking inodelks failed. Proceeding to blocking");
+ "Non blocking locks failed. Proceeding to blocking");
int_lock->lock_cbk = afr_internal_lock_finish;
afr_blocking_lock(frame, this);
} else {
gf_msg_debug(this->name, 0,
- "Non blocking inodelks done. Proceeding to FOP");
- afr_internal_lock_finish(frame, this);
- }
-
- return 0;
-}
-
-int
-afr_post_nonblocking_entrylk_cbk(call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- /* Initiate blocking locks if non-blocking has failed */
- if (int_lock->lock_op_ret < 0) {
- gf_msg_debug(this->name, 0,
- "Non blocking entrylks failed. Proceeding to blocking");
- int_lock->lock_cbk = afr_internal_lock_finish;
- afr_blocking_lock(frame, this);
- } else {
- gf_msg_debug(this->name, 0,
- "Non blocking entrylks done. Proceeding to FOP");
+ "Non blocking locks done. Proceeding to FOP");
afr_internal_lock_finish(frame, this);
}
@@ -2109,7 +2050,7 @@ afr_post_blocking_rename_cbk(call_frame_t *frame, xlator_t *this)
int_lock = &local->internal_lock;
if (int_lock->lock_op_ret < 0) {
- gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_BLOCKING_LKS_FAILED,
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INTERNAL_LKS_FAILED,
"Blocking entrylks failed.");
afr_transaction_done(frame, this);
@@ -2140,25 +2081,26 @@ afr_post_lower_unlock_cbk(call_frame_t *frame, xlator_t *this)
}
int
-afr_set_transaction_flock(xlator_t *this, afr_local_t *local)
+afr_set_transaction_flock(xlator_t *this, afr_local_t *local,
+ afr_lockee_t *lockee)
{
- afr_internal_lock_t *int_lock = NULL;
afr_private_t *priv = NULL;
+ struct gf_flock *flock = NULL;
- int_lock = &local->internal_lock;
priv = this->private;
+ flock = &lockee->flock;
if ((priv->arbiter_count || local->transaction.eager_lock_on ||
priv->full_lock) &&
local->transaction.type == AFR_DATA_TRANSACTION) {
/*Lock entire file to avoid network split brains.*/
- int_lock->flock.l_len = 0;
- int_lock->flock.l_start = 0;
+ flock->l_len = 0;
+ flock->l_start = 0;
} else {
- int_lock->flock.l_len = local->transaction.len;
- int_lock->flock.l_start = local->transaction.start;
+ flock->l_len = local->transaction.len;
+ flock->l_start = local->transaction.start;
}
- int_lock->flock.l_type = F_WRLCK;
+ flock->l_type = F_WRLCK;
return 0;
}
@@ -2168,26 +2110,21 @@ afr_lock(call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
+ int i = 0;
local = frame->local;
int_lock = &local->internal_lock;
+ int_lock->lock_cbk = afr_post_nonblocking_lock_cbk;
int_lock->domain = this->name;
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- afr_set_transaction_flock(this, local);
-
- int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk;
-
- afr_nonblocking_inodelk(frame, this);
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_set_transaction_flock(this, local, &int_lock->lockee[i]);
+ }
- int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
- afr_nonblocking_entrylk(frame, this);
break;
case AFR_ENTRY_TRANSACTION:
@@ -2196,11 +2133,11 @@ afr_lock(call_frame_t *frame, xlator_t *this)
int_lock->lk_loc = &local->transaction.parent_loc;
else
GF_ASSERT(local->fd);
-
- int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
- afr_nonblocking_entrylk(frame, this);
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
break;
}
+ afr_lock_nonblocking(frame, this);
return 0;
}
@@ -2271,17 +2208,19 @@ afr_has_lock_conflict(afr_local_t *local, gf_boolean_t waitlist_check)
/* }}} */
static void
afr_copy_inodelk_vars(afr_internal_lock_t *dst, afr_internal_lock_t *src,
- xlator_t *this)
+ xlator_t *this, int lockee_num)
{
afr_private_t *priv = this->private;
+ afr_lockee_t *sl = &src->lockee[lockee_num];
+ afr_lockee_t *dl = &dst->lockee[lockee_num];
dst->domain = src->domain;
- dst->flock.l_len = src->flock.l_len;
- dst->flock.l_start = src->flock.l_start;
- dst->flock.l_type = src->flock.l_type;
- dst->lock_count = src->lock_count;
- memcpy(dst->locked_nodes, src->locked_nodes,
- priv->child_count * sizeof(*dst->locked_nodes));
+ dl->flock.l_len = sl->flock.l_len;
+ dl->flock.l_start = sl->flock.l_start;
+ dl->flock.l_type = sl->flock.l_type;
+ dl->locked_count = sl->locked_count;
+ memcpy(dl->locked_nodes, sl->locked_nodes,
+ priv->child_count * sizeof(*dl->locked_nodes));
}
void
@@ -2302,7 +2241,7 @@ __afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared)
if (conflict && !list_empty(&lock->owners))
return;
afr_copy_inodelk_vars(&each->internal_lock, &local->internal_lock,
- each->transaction.frame->this);
+ each->transaction.frame->this, 0);
list_move_tail(&each->transaction.wait_list, shared);
list_add_tail(&each->transaction.owner_list, &lock->owners);
}
@@ -2337,7 +2276,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
} else {
lock = &local->inode_ctx->lock[local->transaction.type];
if (local->internal_lock.lock_op_ret < 0) {
- afr_handle_lock_acquire_failure(local, _gf_false);
+ afr_handle_lock_acquire_failure(local);
} else {
lock->event_generation = local->event_generation;
afr_changelog_pre_op(frame, this);
@@ -2408,8 +2347,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
goto out;
}
- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) {
- /*Only allow writes but shard does [f]xattrops on writes, so
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
* they are fine too*/
goto out;
}
@@ -2536,8 +2480,10 @@ afr_changelog_fsync(call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
xdata = dict_new();
- if (xdata)
- ret = dict_set_int32(xdata, "batch-fsync", 1);
+ if (xdata) {
+ ret = dict_set_int32_sizen(xdata, "batch-fsync", 1);
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ }
for (i = 0; i < priv->child_count; i++) {
if (!local->transaction.pre_op[i])
@@ -2779,7 +2725,7 @@ __afr_eager_lock_handle(afr_local_t *local, gf_boolean_t *take_lock,
*timer_local = list_entry(lock->post_op.next, afr_local_t,
transaction.owner_list);
afr_copy_inodelk_vars(&local->internal_lock,
- &(*timer_local)->internal_lock, this);
+ &(*timer_local)->internal_lock, this, 0);
lock->delay_timer = NULL;
*do_pre_op = _gf_true;
list_add_tail(&local->transaction.owner_list, &lock->owners);
@@ -2796,7 +2742,7 @@ __afr_eager_lock_handle(afr_local_t *local, gf_boolean_t *take_lock,
owner_local = list_entry(lock->owners.next, afr_local_t,
transaction.owner_list);
afr_copy_inodelk_vars(&local->internal_lock,
- &owner_local->internal_lock, this);
+ &owner_local->internal_lock, this, 0);
*take_lock = _gf_false;
*do_pre_op = _gf_true;
}
@@ -2866,6 +2812,62 @@ fail:
}
int
+afr_transaction_lockee_init(call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_private_t *priv = frame->this->private;
+ int ret = 0;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ ret = afr_add_inode_lockee(local, priv->child_count);
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = afr_add_entry_lockee(local, &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ if (local->op == GF_FOP_RENAME) {
+ ret = afr_add_entry_lockee(
+ local, &local->transaction.new_parent_loc,
+ local->transaction.new_basename, priv->child_count);
+ if (ret) {
+ goto out;
+ }
+
+ if (local->newloc.inode &&
+ IA_ISDIR(local->newloc.inode->ia_type)) {
+ ret = afr_add_entry_lockee(local, &local->newloc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+ } else if (local->op == GF_FOP_RMDIR) {
+ ret = afr_add_entry_lockee(local, &local->loc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+
+ if (int_lock->lockee_count > 1) {
+ qsort(int_lock->lockee, int_lock->lockee_count,
+ sizeof(*int_lock->lockee), afr_entry_lockee_cmp);
+ }
+ break;
+ }
+out:
+ return ret;
+}
+
+int
afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
{
afr_local_t *local = NULL;
@@ -2879,7 +2881,7 @@ afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
local->transaction.type = type;
- if (priv->quorum_count && !afr_has_quorum(local->child_up, this)) {
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
ret = -afr_quorum_errno(priv);
goto out;
}
@@ -2898,6 +2900,10 @@ afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
if (ret < 0)
goto out;
+ ret = afr_transaction_lockee_init(frame);
+ if (ret)
+ goto out;
+
if (type != AFR_METADATA_TRANSACTION) {
goto txn_start;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 35a922544bc..beefa26f4a6 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -48,7 +48,7 @@ afr_pending_read_decrement(afr_private_t *priv, int child_index);
call_frame_t *
afr_transaction_detach_fop_frame(call_frame_t *frame);
gf_boolean_t
-afr_has_quorum(unsigned char *subvols, xlator_t *this);
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame);
gf_boolean_t
afr_needs_changelog_update(afr_local_t *local);
void
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 5d5e536ff60..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -87,7 +87,7 @@ static void
fix_quorum_options(xlator_t *this, afr_private_t *priv, char *qtype,
dict_t *options)
{
- if (dict_get(options, "quorum-type") == NULL) {
+ if (dict_get_sizen(options, "quorum-type") == NULL) {
/* If user doesn't configure anything enable auto-quorum if the
* replica has more than two subvolumes */
if (priv->child_count > 2)
@@ -120,23 +120,62 @@ afr_set_favorite_child_policy(afr_private_t *priv, char *policy)
return 0;
}
+
+static void
+set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
+{
+ if (!algo) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ } else if (strcmp(algo, "full") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp(algo, "diff") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DIFF;
+ } else {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ }
+}
+
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
afr_private_t *priv = NULL;
xlator_t *read_subvol = NULL;
int read_subvol_index = -1;
+ int timeout_old = 0;
int ret = -1;
int index = -1;
char *qtype = NULL;
char *fav_child_policy = NULL;
+ char *data_self_heal = NULL;
+ char *data_self_heal_algorithm = NULL;
+ char *locking_scheme = NULL;
gf_boolean_t consistent_io = _gf_false;
gf_boolean_t choose_local_old = _gf_false;
+ gf_boolean_t enabled_old = _gf_false;
priv = this->private;
- GF_OPTION_RECONF("afr-dirty-xattr", priv->afr_dirty, options, str, out);
-
GF_OPTION_RECONF("metadata-splitbrain-forced-heal",
priv->metadata_splitbrain_forced_heal, options, bool, out);
@@ -149,7 +188,9 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("metadata-self-heal", priv->metadata_self_heal, options,
bool, out);
- GF_OPTION_RECONF("data-self-heal", priv->data_self_heal, options, str, out);
+ GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
out);
@@ -157,8 +198,9 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("data-self-heal-window-size",
priv->data_self_heal_window_size, options, uint32, out);
- GF_OPTION_RECONF("data-self-heal-algorithm", priv->data_self_heal_algorithm,
+ GF_OPTION_RECONF("data-self-heal-algorithm", data_self_heal_algorithm,
options, str, out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
GF_OPTION_RECONF("halo-enabled", priv->halo_enabled, options, bool, out);
@@ -214,16 +256,19 @@ reconfigure(xlator_t *this, dict_t *options)
}
GF_OPTION_RECONF("pre-op-compat", priv->pre_op_compat, options, bool, out);
- GF_OPTION_RECONF("locking-scheme", priv->locking_scheme, options, str, out);
+ GF_OPTION_RECONF("locking-scheme", locking_scheme, options, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
GF_OPTION_RECONF("full-lock", priv->full_lock, options, bool, out);
GF_OPTION_RECONF("granular-entry-heal", priv->esh_granular, options, bool,
out);
GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF("optimistic-change-log", priv->optimistic_change_log,
+ options, bool, out);
GF_OPTION_RECONF("quorum-type", qtype, options, str, out);
GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);
fix_quorum_options(this, priv, qtype, options);
- if (priv->quorum_count && !afr_has_quorum(priv->child_up, this))
+ if (priv->quorum_count && !afr_has_quorum(priv->child_up, this, NULL))
gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
"Client-quorum is not met");
@@ -236,11 +281,13 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
bool, out);
+ enabled_old = priv->shd.enabled;
GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
out);
+ timeout_old = priv->shd.timeout;
GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
@@ -264,6 +311,16 @@ reconfigure(xlator_t *this, dict_t *options)
consistent_io = _gf_false;
priv->consistent_io = consistent_io;
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
+ if (priv->shd.enabled) {
+ if ((priv->shd.enabled != enabled_old) ||
+ (timeout_old != priv->shd.timeout))
+ afr_selfheal_childup(this, priv);
+ }
+
ret = 0;
out:
return ret;
@@ -349,7 +406,7 @@ afr_ta_init(afr_private_t *priv)
priv->release_ta_notify_dom_lock = _gf_false;
INIT_LIST_HEAD(&priv->ta_waitq);
INIT_LIST_HEAD(&priv->ta_onwireq);
- *priv->ta_gfid = 0;
+ gf_uuid_clear(priv->ta_gfid);
}
int32_t
@@ -366,6 +423,9 @@ init(xlator_t *this)
char *qtype = NULL;
char *fav_child_policy = NULL;
char *thin_arbiter = NULL;
+ char *data_self_heal = NULL;
+ char *locking_scheme = NULL;
+ char *data_self_heal_algorithm = NULL;
if (!this->children) {
gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_CHILD_MISCONFIGURED,
@@ -385,6 +445,8 @@ init(xlator_t *this)
goto out;
priv = this->private;
+ INIT_LIST_HEAD(&priv->saved_locks);
+ INIT_LIST_HEAD(&priv->lk_healq);
LOCK_INIT(&priv->lock);
child_count = xlator_subvolume_count(this);
@@ -448,10 +510,13 @@ init(xlator_t *this)
GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
- GF_OPTION_INIT("data-self-heal", priv->data_self_heal, str, out);
+ GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_INIT("data-self-heal-algorithm", priv->data_self_heal_algorithm,
- str, out);
+ GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
+ out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
GF_OPTION_INIT("data-self-heal-window-size",
priv->data_self_heal_window_size, uint32, out);
@@ -479,7 +544,8 @@ init(xlator_t *this)
out);
GF_OPTION_INIT("pre-op-compat", priv->pre_op_compat, bool, out);
- GF_OPTION_INIT("locking-scheme", priv->locking_scheme, str, out);
+ GF_OPTION_INIT("locking-scheme", locking_scheme, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
GF_OPTION_INIT("full-lock", priv->full_lock, bool, out);
GF_OPTION_INIT("granular-entry-heal", priv->esh_granular, bool, out);
@@ -500,7 +566,9 @@ init(xlator_t *this)
GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
if (priv->quorum_count != 0)
priv->consistent_io = _gf_false;
@@ -512,13 +580,19 @@ init(xlator_t *this)
goto out;
}
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
+
priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
gf_afr_mt_char);
priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- if (!priv->child_up || !priv->child_latency) {
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
ret = -ENOMEM;
goto out;
}
@@ -559,12 +633,20 @@ init(xlator_t *this)
goto out;
}
- ret = afr_selfheal_daemon_init(this);
- if (ret) {
+ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable) {
ret = -ENOMEM;
goto out;
}
+ if (priv->shd.iamshd) {
+ ret = afr_selfheal_daemon_init(this);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
/* keep more local here as we may need them for self-heal etc */
this->local_pool = mem_pool_new(afr_local_t, 512);
if (!this->local_pool) {
@@ -578,24 +660,91 @@ init(xlator_t *this)
out:
return ret;
}
+void
+afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = -1;
-int
+ if (!healer)
+ return;
+
+ if (healer->running) {
+ /*
+ * If there are any resources to cleanup, We need
+ * to do that gracefully using pthread_cleanup_push
+ */
+ ret = gf_thread_cleanup_xint(healer->thread);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Failed to clean up healer threads.");
+ healer->thread = 0;
+ }
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+afr_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ afr_self_heald_t *shd = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ if (shd->statistics[i])
+ eh_destroy(shd->statistics[i]);
+ }
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+ GF_FREE(shd->statistics);
+ if (shd->split_brain)
+ eh_destroy(shd->split_brain);
+}
+void
fini(xlator_t *this)
{
afr_private_t *priv = NULL;
priv = this->private;
+
+ afr_selfheal_daemon_fini(this);
+ GF_ASSERT(list_empty(&priv->saved_locks));
+
LOCK(&priv->lock);
if (priv->timer != NULL) {
gf_timer_call_cancel(this->ctx, priv->timer);
priv->timer = NULL;
}
UNLOCK(&priv->lock);
+
+ if (this->local_pool != NULL) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
this->private = NULL;
afr_priv_destroy(priv);
- // if (this->itable);//I don't see any destroy func
+ if (this->itable) {
+ inode_table_destroy(this->itable);
+ this->itable = NULL;
+ }
- return 0;
+ return;
}
struct xlator_fops fops = {
@@ -619,6 +768,7 @@ struct xlator_fops fops = {
.getxattr = afr_getxattr,
.fgetxattr = afr_fgetxattr,
.readv = afr_readv,
+ .seek = afr_seek,
/* inode write */
.writev = afr_writev,
@@ -691,7 +841,7 @@ struct volume_options options[] = {
{.key = {"read-hash-mode"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
- .max = 3,
+ .max = 5,
.default_value = "1",
.op_version = {2},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
@@ -704,7 +854,10 @@ struct volume_options options[] = {
"1 = hash by GFID of file (all clients use "
"same subvolume).\n"
"2 = hash by GFID of file and client PID.\n"
- "3 = brick having the least outstanding read requests."},
+ "3 = brick having the least outstanding read requests.\n"
+ "4 = brick having the least network ping latency.\n"
+ "5 = Hybrid mode between 3 and 4, ie least value among "
+ "network-latency multiplied by outstanding-read-requests."},
{
.key = {"choose-local"},
.type = GF_OPTION_TYPE_BOOL,
@@ -797,7 +950,7 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.value = {"1", "on", "yes", "true", "enable", "0", "off", "no", "false",
"disable", "open"},
- .default_value = "on",
+ .default_value = "off",
.op_version = {1},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.tags = {"replicate"},
@@ -834,7 +987,7 @@ struct volume_options options[] = {
"process would be applied simultaneously."},
{.key = {"metadata-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
+ .default_value = "off",
.op_version = {1},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.tags = {"replicate"},
@@ -844,7 +997,7 @@ struct volume_options options[] = {
"the file/directory."},
{.key = {"entry-self-heal"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
+ .default_value = "off",
.op_version = {1},
.flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.tags = {"replicate"},
@@ -1164,5 +1317,28 @@ struct volume_options options[] = {
.tags = {"replicate"},
.description = "This option exists only for backward compatibility "
"and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "replicate",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 6f8015380f0..d62f9a9caf2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -11,17 +11,18 @@
#ifndef __AFR_H__
#define __AFR_H__
-#include "call-stub.h"
-#include "compat-errno.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/compat-errno.h>
#include "afr-mem-types.h"
#include "libxlator.h"
-#include "timer.h"
-#include "syncop.h"
+#include <glusterfs/timer.h>
+#include <glusterfs/syncop.h>
#include "afr-self-heald.h"
#include "afr-messages.h"
+#define SHD_INODE_LRU_LIMIT 1
#define AFR_PATHINFO_HEADER "REPLICATE:"
#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
@@ -38,7 +39,10 @@
#define AFR_TA_DOM_NOTIFY "afr.ta.dom-notify"
#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
+#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
+
#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
#define PFLAG_PENDING (1 << 0)
#define PFLAG_SBRAIN (1 << 1)
@@ -94,6 +98,25 @@ typedef int (*afr_changelog_resume_t)(call_frame_t *frame, xlator_t *this);
gf_fop_list[local->op], uuid_utoa(local->inode->gfid)); \
} while (0)
+#define AFR_ERROR_OUT_IF_FDCTX_INVALID(__fd, __this, __error, __label) \
+ do { \
+ afr_fd_ctx_t *__fd_ctx = NULL; \
+ __fd_ctx = afr_fd_ctx_get(__fd, __this); \
+ if (__fd_ctx && __fd_ctx->is_fd_bad) { \
+ __error = EBADF; \
+ goto __label; \
+ } \
+ } while (0)
+
+typedef enum {
+ AFR_READ_POLICY_FIRST_UP,
+ AFR_READ_POLICY_GFID_HASH,
+ AFR_READ_POLICY_GFID_PID_HASH,
+ AFR_READ_POLICY_LESS_LOAD,
+ AFR_READ_POLICY_LEAST_LATENCY,
+ AFR_READ_POLICY_LOAD_LATENCY_HYBRID,
+} afr_read_hash_mode_t;
+
typedef enum {
AFR_FAV_CHILD_NONE,
AFR_FAV_CHILD_BY_SIZE,
@@ -104,6 +127,12 @@ typedef enum {
} afr_favorite_child_policy;
typedef enum {
+ AFR_SELFHEAL_DATA_FULL = 0,
+ AFR_SELFHEAL_DATA_DIFF,
+ AFR_SELFHEAL_DATA_DYNAMIC,
+} afr_data_self_heal_type_t;
+
+typedef enum {
AFR_CHILD_UNKNOWN = -1,
AFR_CHILD_ZERO,
AFR_CHILD_ONE,
@@ -119,13 +148,27 @@ typedef enum {
*on BAD brick - Success*/
TA_INFO_IN_MEMORY_FAILED, /*Bad brick info is in memory and fop failed
*on GOOD brick - Failed*/
+ TA_SUCCESS, /*FOP succeeded on both data bricks.*/
} afr_ta_fop_state_t;
struct afr_nfsd {
- gf_boolean_t iamnfsd;
uint32_t halo_max_latency_msec;
+ gf_boolean_t iamnfsd;
};
+typedef struct _afr_lk_heal_info {
+ fd_t *fd;
+ int32_t cmd;
+ struct gf_flock flock;
+ dict_t *xdata_req;
+ unsigned char *locked_nodes;
+ struct list_head pos;
+ gf_lkowner_t lk_owner;
+ pid_t pid;
+ int32_t *child_up_event_gen;
+ int32_t *child_down_event_gen;
+} afr_lk_heal_info_t;
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -136,26 +179,27 @@ typedef struct _afr_private {
inode_t *root_inode;
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
/* For thin-arbiter. */
- unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
uuid_t ta_gfid;
- unsigned char ta_child_up;
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
int ta_bad_child_index;
- off_t ta_notify_dom_lock_offset;
- gf_boolean_t release_ta_notify_dom_lock;
+ int ta_event_gen;
unsigned int ta_in_mem_txn_count;
unsigned int ta_on_wire_txn_count;
struct list_head ta_waitq;
struct list_head ta_onwireq;
+ unsigned char *anon_inode;
unsigned char *child_up;
+ unsigned char *halo_child_up;
int64_t *child_latency;
unsigned char *local;
char **pending_key;
- char *data_self_heal; /* on/off/open */
- char *data_self_heal_algorithm; /* name of algorithm */
+ afr_data_self_heal_type_t data_self_heal_algorithm;
unsigned int data_self_heal_window_size; /* max number of pipelined
read/writes */
@@ -170,30 +214,31 @@ typedef struct _afr_private {
int32_t healers; /* No. of elements currently undergoing background
heal*/
+ gf_boolean_t release_ta_notify_dom_lock;
+
gf_boolean_t metadata_self_heal; /* on/off */
gf_boolean_t entry_self_heal; /* on/off */
gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
int read_child; /* read-subvolume */
- unsigned int hash_mode; /* for when read_child is not set */
gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/
- int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
- afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
- resolution of split-brains.*/
+ gf_timer_t *timer; /* launched when parent up is received */
unsigned int wait_count; /* # of servers to wait for success */
- gf_timer_t *timer; /* launched when parent up is received */
-
+ unsigned char ta_child_up;
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
gf_boolean_t pre_op_compat; /* on/off */
uint32_t post_op_delay_secs;
unsigned int quorum_count;
- char vol_uuid[UUID_SIZE + 1];
+ off_t ta_notify_dom_lock_offset;
+ afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
+ resolution of split-brains.*/
+ afr_read_hash_mode_t hash_mode; /* for when read_child is not set */
+
int32_t *last_event;
/* @event_generation: Keeps count of number of events received which can
@@ -206,33 +251,41 @@ typedef struct _afr_private {
important as we might have had a network split brain.
*/
uint32_t event_generation;
+ char vol_uuid[UUID_SIZE + 1];
gf_boolean_t choose_local;
gf_boolean_t did_discovery;
- uint64_t sh_readdir_size;
gf_boolean_t ensure_durability;
+ gf_boolean_t halo_enabled;
+ gf_boolean_t consistent_metadata;
+ gf_boolean_t need_heal;
+ gf_boolean_t granular_locks;
+ uint64_t sh_readdir_size;
char *sh_domain;
char *afr_dirty;
- gf_boolean_t halo_enabled;
- uint32_t halo_max_latency_msec;
- uint32_t halo_max_replicas;
- uint32_t halo_min_replicas;
+ uint64_t spb_choice_timeout;
afr_self_heald_t shd;
struct afr_nfsd nfsd;
- gf_boolean_t consistent_metadata;
- uint64_t spb_choice_timeout;
- gf_boolean_t need_heal;
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
- /* pump dependencies */
- void *pump_private;
- gf_boolean_t use_afr_in_pump;
- char *locking_scheme;
gf_boolean_t full_lock;
gf_boolean_t esh_granular;
gf_boolean_t consistent_io;
+ gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
+
+ /*For lock healing.*/
+ struct list_head saved_locks;
+ struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
} afr_private_t;
typedef enum {
@@ -281,12 +334,14 @@ afr_index_from_ia_type(ia_type_t type)
}
typedef struct {
+ struct gf_flock flock;
loc_t loc;
+ fd_t *fd;
char *basename;
unsigned char *locked_nodes;
int locked_count;
-} afr_entry_lockee_t;
+} afr_lockee_t;
int
afr_entry_lockee_cmp(const void *l1, const void *l2);
@@ -294,21 +349,17 @@ afr_entry_lockee_cmp(const void *l1, const void *l2);
typedef struct {
loc_t *lk_loc;
- int lockee_count;
- afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+ afr_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
- struct gf_flock flock;
const char *lk_basename;
const char *lower_basename;
const char *higher_basename;
- char lower_locked;
- char higher_locked;
- unsigned char *locked_nodes;
unsigned char *lower_locked_nodes;
- int32_t lock_count;
- int32_t entrylk_lock_count;
+ afr_lock_cbk_t lock_cbk;
+
+ int lockee_count;
int32_t lk_call_count;
int32_t lk_expected_count;
@@ -316,14 +367,15 @@ typedef struct {
int32_t lock_op_ret;
int32_t lock_op_errno;
- afr_lock_cbk_t lock_cbk;
char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+ int32_t lock_count;
+ char lower_locked;
+ char higher_locked;
} afr_internal_lock_t;
struct afr_reply {
int valid;
int32_t op_ret;
- int32_t op_errno;
dict_t *xattr; /*For xattrop*/
dict_t *xdata;
struct iatt poststat;
@@ -332,6 +384,7 @@ struct afr_reply {
struct iatt preparent;
struct iatt preparent2;
struct iatt postparent2;
+ int32_t op_errno;
/* For rchecksum */
uint8_t checksum[SHA256_DIGEST_LENGTH];
gf_boolean_t buf_has_zeroes;
@@ -355,6 +408,10 @@ typedef struct {
arrives, we continue to read off this subvol.
*/
int readdir_subvol;
+ /* lock-healing related members. */
+ gf_boolean_t is_fd_bad;
+ afr_lk_heal_info_t *lk_heal_info;
+
} afr_fd_ctx_t;
typedef enum {
@@ -371,8 +428,6 @@ typedef struct _afr_inode_lock_t {
*/
int32_t num_inodelks;
unsigned int event_generation;
- gf_boolean_t release;
- gf_boolean_t acquired;
gf_timer_t *delay_timer;
struct list_head owners; /*Transactions that are performing fop*/
struct list_head post_op; /*Transactions that are done with the fop
@@ -381,6 +436,8 @@ typedef struct _afr_inode_lock_t {
*conflicting transactions to complete*/
struct list_head frozen; /*Transactions that need to go as part of
* next batch of eager-lock*/
+ gf_boolean_t release;
+ gf_boolean_t acquired;
} afr_lock_t;
typedef struct _afr_inode_ctx {
@@ -389,15 +446,11 @@ typedef struct _afr_inode_ctx {
int lock_count;
int spb_choice;
gf_timer_t *timer;
- gf_boolean_t need_refresh;
unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
int inherited[AFR_NUM_CHANGE_LOGS];
int on_disk[AFR_NUM_CHANGE_LOGS];
-
- /* set if any write on this fd was a non stable write
- (i.e, without O_SYNC or O_DSYNC)
- */
- gf_boolean_t witnessed_unstable_write;
+ /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
+ afr_lock_t lock[2];
/* @open_fd_count:
Number of open FDs queried from the server, as queried through
@@ -405,8 +458,12 @@ typedef struct _afr_inode_ctx {
temporarily disabled.
*/
uint32_t open_fd_count;
- /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
- afr_lock_t lock[2];
+ gf_boolean_t need_refresh;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
} afr_inode_ctx_t;
typedef struct _afr_local {
@@ -420,19 +477,15 @@ typedef struct _afr_local {
unsigned int event_generation;
uint32_t open_fd_count;
- gf_boolean_t update_open_fd_count;
int32_t num_inodelks;
- gf_boolean_t update_num_inodelks;
-
- gf_lkowner_t saved_lk_owner;
int32_t op_ret;
int32_t op_errno;
- int32_t **pending;
-
int dirty[AFR_NUM_CHANGE_LOGS];
+ int32_t **pending;
+
loc_t loc;
loc_t newloc;
@@ -463,14 +516,6 @@ typedef struct _afr_local {
afr_read_txn_wind_t readfn;
- /* @refreshed:
-
- the inode was "refreshed" (i.e, pending xattrs from all subvols
- freshly inspected and inode ctx updated accordingly) as part of
- this transaction already.
- */
- gf_boolean_t refreshed;
-
/* @inode:
the inode on which the read txn is performed on. ref'ed and copied
@@ -495,8 +540,6 @@ typedef struct _afr_local {
unsigned char *readable;
unsigned char *readable2; /*For rename transaction*/
- int read_subvol; /* Current read subvolume */
-
afr_inode_refresh_cbk_t refreshfn;
/* @refreshinode:
@@ -505,9 +548,30 @@ typedef struct _afr_local {
*/
inode_t *refreshinode;
+ dict_t *xattr_req;
+
+ dict_t *dict;
+
+ int read_subvol; /* Current read subvolume */
+
+ int optimistic_change_log;
+
+ afr_internal_lock_t internal_lock;
+
/*To handle setattr/setxattr on yet to be linked inode from dht*/
uuid_t refreshgfid;
+ /* @refreshed:
+
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
+
+ gf_boolean_t update_num_inodelks;
+ gf_boolean_t update_open_fd_count;
+
/*
@pre_op_compat:
@@ -517,14 +581,6 @@ typedef struct _afr_local {
gf_boolean_t pre_op_compat;
- dict_t *xattr_req;
-
- afr_internal_lock_t internal_lock;
-
- dict_t *dict;
-
- int optimistic_change_log;
-
/* Is the current writev() going to perform a stable write?
i.e, is fd->flags or @flags writev param have O_SYNC or
O_DSYNC?
@@ -543,25 +599,25 @@ typedef struct _afr_local {
struct {
struct {
- gf_boolean_t needs_fresh_lookup;
- uuid_t gfid_req;
- } lookup;
-
- struct {
- unsigned char buf_set;
struct statvfs buf;
+ unsigned char buf_set;
} statfs;
struct {
- int32_t flags;
fd_t *fd;
+ int32_t flags;
} open;
struct {
- int32_t cmd;
struct gf_flock user_flock;
struct gf_flock ret_flock;
unsigned char *locked_nodes;
+ int32_t cmd;
+ /*For lock healing only.*/
+ unsigned char *dom_locked_nodes;
+ int32_t *dom_lock_op_ret;
+ int32_t *dom_lock_op_errno;
+ struct gf_flock *getlk_rsp;
} lk;
/* inode read */
@@ -586,8 +642,8 @@ typedef struct _afr_local {
struct {
char *name;
- int last_index;
long xattr_len;
+ int last_index;
} getxattr;
struct {
@@ -600,11 +656,10 @@ typedef struct _afr_local {
/* dir read */
struct {
+ uint32_t *checksum;
int success_count;
int32_t op_ret;
int32_t op_errno;
-
- uint32_t *checksum;
} opendir;
struct {
@@ -613,8 +668,8 @@ typedef struct _afr_local {
size_t size;
off_t offset;
dict_t *dict;
- gf_boolean_t failed;
int last_index;
+ gf_boolean_t failed;
} readdir;
/* inode write */
@@ -624,12 +679,11 @@ typedef struct _afr_local {
} inode_wfop; // common structure for all inode-write-fops
struct {
- int32_t op_ret;
-
struct iovec *vector;
struct iobref *iobref;
- int32_t count;
off_t offset;
+ int32_t op_ret;
+ int32_t count;
uint32_t flags;
} writev;
@@ -689,29 +743,25 @@ typedef struct _afr_local {
} create;
struct {
+ dict_t *params;
dev_t dev;
mode_t mode;
- dict_t *params;
} mknod;
struct {
- int32_t mode;
dict_t *params;
+ int32_t mode;
} mkdir;
struct {
- int flags;
- } rmdir;
-
- struct {
dict_t *params;
char *linkpath;
} symlink;
struct {
- int32_t mode;
off_t offset;
size_t len;
+ int32_t mode;
} fallocate;
struct {
@@ -738,10 +788,10 @@ typedef struct _afr_local {
struct {
char *volume;
char *basename;
+ void *xdata;
entrylk_cmd in_cmd;
entrylk_cmd cmd;
entrylk_type type;
- void *xdata;
} entrylk;
struct {
@@ -750,31 +800,33 @@ typedef struct _afr_local {
} seek;
struct {
- int32_t datasync;
- } fsync;
-
- struct {
struct gf_lease user_lease;
struct gf_lease ret_lease;
unsigned char *locked_nodes;
} lease;
- } cont;
+ struct {
+ int flags;
+ } rmdir;
- struct {
- off_t start, len;
+ struct {
+ int32_t datasync;
+ } fsync;
- gf_boolean_t eager_lock_on;
- gf_boolean_t do_eager_unlock;
+ struct {
+ uuid_t gfid_req;
+ gf_boolean_t needs_fresh_lookup;
+ } lookup;
+
+ } cont;
+ struct {
char *basename;
char *new_basename;
loc_t parent_loc;
loc_t new_parent_loc;
- afr_transaction_type type;
-
/* stub to resume on destruction
of the transaction frame */
call_stub_t *resume_stub;
@@ -792,6 +844,30 @@ typedef struct _afr_local {
FOP failed. */
unsigned char *failed_subvols;
+ call_frame_t *main_frame; /*Fop frame*/
+ call_frame_t *frame; /*Transaction frame*/
+
+ int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
+
+ int (*unwind)(call_frame_t *frame, xlator_t *this);
+
+ off_t start, len;
+
+ afr_transaction_type type;
+
+ int32_t in_flight_sb_errno; /* This is where the cause of the
+ failure on the last good copy of
+ the file is stored.
+ */
+
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+ afr_changelog_resume_t changelog_resume;
+
+ gf_boolean_t eager_lock_on;
+ gf_boolean_t do_eager_unlock;
+
/* @dirtied: flag which indicates whether we set dirty flag
in the OP. Typically true when we are performing operation
on more than one subvol and optimistic changelog is disabled
@@ -816,6 +892,10 @@ typedef struct _afr_local {
*/
gf_boolean_t no_uninherit;
+ gf_boolean_t in_flight_sb; /* Indicator for occurrence of
+ split-brain while in the middle of
+ a txn. */
+
/* @uninherit_done:
@uninherit_value:
@@ -828,27 +908,7 @@ typedef struct _afr_local {
gf_boolean_t uninherit_done;
gf_boolean_t uninherit_value;
- gf_boolean_t in_flight_sb; /* Indicator for occurrence of
- split-brain while in the middle of
- a txn. */
- int32_t in_flight_sb_errno; /* This is where the cause of the
- failure on the last good copy of
- the file is stored.
- */
-
- /* @changelog_resume: function to be called after changlogging
- (either pre-op or post-op) is done
- */
- afr_changelog_resume_t changelog_resume;
-
- call_frame_t *main_frame; /*Fop frame*/
- call_frame_t *frame; /*Transaction frame*/
-
- int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
-
- int (*unwind)(call_frame_t *frame, xlator_t *this);
-
- /* post-op hook */
+ gf_boolean_t disable_delayed_post_op;
} transaction;
syncbarrier_t barrier;
@@ -861,33 +921,36 @@ typedef struct _afr_local {
mode_t umask;
int xflag;
- gf_boolean_t do_discovery;
struct afr_reply *replies;
/* For client side background heals. */
struct list_head healer;
call_frame_t *heal_frame;
- gf_boolean_t need_full_crawl;
- afr_fop_lock_state_t fop_lock_state;
-
- gf_boolean_t is_read_txn;
afr_inode_ctx_t *inode_ctx;
/*For thin-arbiter transactions.*/
- unsigned char ta_child_up;
+ int ta_failed_subvol;
+ int ta_event_gen;
struct list_head ta_waitq;
struct list_head ta_onwireq;
afr_ta_fop_state_t fop_state;
- int ta_failed_subvol;
+ afr_fop_lock_state_t fop_lock_state;
+ gf_lkowner_t saved_lk_owner;
+ unsigned char read_txn_query_child;
+ unsigned char ta_child_up;
+ gf_boolean_t do_discovery;
+ gf_boolean_t need_full_crawl;
+ gf_boolean_t is_read_txn;
+ gf_boolean_t is_new_entry;
} afr_local_t;
typedef struct afr_spbc_timeout {
call_frame_t *frame;
- gf_boolean_t d_spb;
- gf_boolean_t m_spb;
loc_t *loc;
int spb_child_index;
+ gf_boolean_t d_spb;
+ gf_boolean_t m_spb;
} afr_spbc_timeout_t;
typedef struct afr_spb_status {
@@ -897,9 +960,9 @@ typedef struct afr_spb_status {
typedef struct afr_empty_brick_args {
call_frame_t *frame;
+ char *op_type;
loc_t loc;
int empty_index;
- char *op_type;
} afr_empty_brick_args_t;
typedef struct afr_read_subvol_args {
@@ -941,7 +1004,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
int event_generation);
int
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
+
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
@@ -973,11 +1039,14 @@ int
xattr_is_equal(dict_t *this, char *key1, data_t *value1, void *data);
int
-afr_init_entry_lockee(afr_entry_lockee_t *lockee, afr_local_t *local,
- loc_t *loc, char *basename, int child_count);
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count);
+
+int
+afr_add_inode_lockee(afr_local_t *local, int child_count);
void
-afr_entry_lockee_cleanup(afr_internal_lock_t *int_lock);
+afr_lockees_cleanup(afr_internal_lock_t *int_lock);
int
afr_attempt_lock_recovery(xlator_t *this, int32_t child_index);
@@ -995,10 +1064,7 @@ int32_t
afr_unlock(call_frame_t *frame, xlator_t *this);
int
-afr_nonblocking_entrylk(call_frame_t *frame, xlator_t *this);
-
-int
-afr_nonblocking_inodelk(call_frame_t *frame, xlator_t *this);
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this);
int
afr_blocking_lock(call_frame_t *frame, xlator_t *this);
@@ -1057,6 +1123,9 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
if (__local && __local->is_read_txn) \
afr_pending_read_decrement(__this->private, \
__local->read_subvol); \
+ if (__local && __local->xdata_req && \
+ afr_is_lock_mode_mandatory(__local->xdata_req)) \
+ afr_dom_lock_release(frame); \
frame->local = NULL; \
} \
\
@@ -1084,8 +1153,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
#define AFR_FRAME_INIT(frame, op_errno) \
({ \
frame->local = mem_get0(THIS->local_pool); \
- if (afr_local_init(frame->local, THIS->private, &op_errno)) { \
- afr_local_cleanup(frame->local, THIS); \
+ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \
+ afr_local_cleanup(frame->local, frame->this); \
mem_put(frame->local); \
frame->local = NULL; \
}; \
@@ -1209,8 +1278,8 @@ int
afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
int spb_choice);
int
-afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
- int *spb_choice);
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
int
afr_get_child_index_from_name(xlator_t *this, char *name);
@@ -1265,9 +1334,6 @@ afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame);
void
afr_update_uninodelk(afr_local_t *local, afr_internal_lock_t *int_lock,
int32_t child_index);
-int
-afr_is_inodelk_transaction(afr_transaction_type type);
-
afr_fd_ctx_t *
__afr_fd_ctx_get(fd_t *fd, xlator_t *this);
@@ -1298,7 +1364,7 @@ int
afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
int
-afr_fill_ta_loc(xlator_t *this, loc_t *loc);
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
int
afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
@@ -1320,4 +1386,38 @@ afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local);
void
afr_ta_lock_release_synctask(xlator_t *this);
+
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv);
+
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame,
+ const unsigned int up_children_count);
+
+void
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
+
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv,
+ int child);
+
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata);
+
+void
+afr_dom_lock_release(call_frame_t *frame);
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index caeb17b0f07..56f1f2ad7c8 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -14,19 +14,13 @@ dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module \
- -export-symbols $(top_srcdir)/xlators/cluster/dht/src/dht.sym \
- $(GF_XLATOR_LDFLAGS)
+dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module \
- -export-symbols $(top_srcdir)/xlators/cluster/dht/src/nufa.sym \
- $(GF_XLATOR_LDFLAGS)
+nufa_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module \
- -export-symbols $(top_srcdir)/xlators/cluster/dht/src/switch.sym \
- $(GF_XLATOR_LDFLAGS)
+switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \
@@ -41,8 +35,6 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
CLEANFILES =
-EXTRA_DIST = dht.sym nufa.sym switch.sym
-
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/distribute.so
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 24402399597..8ba0cc4c732 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -10,48 +10,52 @@
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "libxlator.h"
#include "dht-common.h"
#include "dht-lock.h"
-#include "defaults.h"
-#include "byte-order.h"
-#include "quota-common-utils.h"
-#include "upcall-utils.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/quota-common-utils.h>
+#include <glusterfs/upcall-utils.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
#include <signal.h>
-int run_defrag = 0;
-
-int
-dht_link2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
-int
-dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
- int ret);
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
-dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
-int
-dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this);
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
-int
-dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata);
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
-int
-dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-int
-dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
-int
-dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc);
+/* Check the xdata to make sure EBADF has been set by client xlator */
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno)
+{
+ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) &&
+ !(local->fd_checked)) {
+ return 1;
+ }
+ return 0;
+}
/* Sets the blocks and size values to fixed values. This is to be called
* only for dirs. The caller is responsible for checking the type
@@ -67,67 +71,17 @@ dht_set_fixed_dir_stat(struct iatt *stat)
return -1;
}
-/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
- * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
- * DHT_IATT_IN_XDATA_KEY
- */
-int
-dht_request_iatt_in_xdata(xlator_t *this, dict_t *xattr_req)
-{
- int ret = -1;
-
- ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
- ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
-
- /* At least one call succeeded */
- return ret;
-}
-
-/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
- * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
- * DHT_IATT_IN_XDATA_KEY
- * This will return a dummy iatt with only the mode and type set
- */
-int
-dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf)
-{
- int ret = -1;
- int32_t mode = 0;
-
- ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
-
- if (ret) {
- ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- } else {
- stbuf->ia_prot = ia_prot_from_st_mode(mode);
- stbuf->ia_type = ia_type_from_st_mode(mode);
- }
-
- return ret;
-}
-
-int
-dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
-
-char *xattrs_to_heal[] = {"user.",
- POSIX_ACL_ACCESS_XATTR,
- POSIX_ACL_DEFAULT_XATTR,
- QUOTA_LIMIT_KEY,
- QUOTA_LIMIT_OBJECTS_KEY,
- GF_SELINUX_XATTR_KEY,
- NULL};
-
-char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
-
/* Return true if key exists in array
*/
static gf_boolean_t
dht_match_xattr(const char *key)
{
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
}
-int
+static int
dht_aggregate_quota_xattr(dict_t *dst, char *key, data_t *value)
{
int ret = -1;
@@ -190,7 +144,7 @@ out:
return ret;
}
-int
+static int
add_opt(char **optsp, const char *opt)
{
char *newopts = NULL;
@@ -268,7 +222,7 @@ out:
*/
-int
+static int
dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value)
{
int ret = 0;
@@ -367,7 +321,7 @@ out:
return ret;
}
-int
+static int
dht_aggregate(dict_t *this, char *key, data_t *value, void *data)
{
dict_t *dst = NULL;
@@ -414,7 +368,7 @@ out:
return ret;
}
-void
+static void
dht_aggregate_xattr(dict_t *dst, dict_t *src)
{
if ((dst == NULL) || (src == NULL)) {
@@ -496,7 +450,7 @@ dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol)
- complete linkfile selfheal
*/
-int
+static int
dht_lookup_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -538,7 +492,7 @@ out:
return ret;
}
-int
+static int
dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
{
dht_local_t *local = NULL;
@@ -659,13 +613,14 @@ dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
if (local->need_xattr_heal && !heal_path) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_DIR_XATTR_HEAL_FAILED,
"xattr heal failed for "
"directory gfid is %s ",
gfid_local);
+ }
}
}
@@ -726,7 +681,7 @@ out:
return ret;
}
-int
+static int
dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -735,6 +690,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
dht_conf_t *conf = 0;
dht_layout_t *layout = NULL;
+ int32_t mds_heal_fresh_lookup = 0;
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
@@ -742,6 +698,7 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
conf = this->private;
layout = local->selfheal.layout;
+ mds_heal_fresh_lookup = local->mds_heal_fresh_lookup;
if (op_ret) {
gf_msg_debug(this->name, op_ret,
@@ -762,11 +719,63 @@ dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
layout);
}
out:
- if (local && local->mds_heal_fresh_lookup)
+ if (mds_heal_fresh_lookup)
DHT_STACK_DESTROY(frame);
return 0;
}
+static xlator_t *
+dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
+{
+ char *path = NULL;
+ loc_t populate_loc = {
+ 0,
+ };
+ char *name = NULL;
+ xlator_t *hash_subvol = NULL;
+
+ if (!inode)
+ return hash_subvol;
+
+ if (loc && loc->parent && loc->path) {
+ if (!loc->name) {
+ name = strrchr(loc->path, '/');
+ if (name) {
+ loc->name = name + 1;
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, loc);
+ goto out;
+ }
+
+ if (!gf_uuid_is_null(inode->gfid)) {
+ populate_loc.inode = inode_ref(inode);
+ populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
+ inode_path(populate_loc.inode, NULL, &path);
+
+ if (!path)
+ goto out;
+
+ populate_loc.path = path;
+ if (!populate_loc.name && populate_loc.path) {
+ name = strrchr(populate_loc.path, '/');
+ if (name) {
+ populate_loc.name = name + 1;
+
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
+ }
+out:
+ if (populate_loc.inode)
+ loc_wipe(&populate_loc);
+ return hash_subvol;
+}
+
/* Common function call by revalidate/selfheal code path to populate
internal xattr if it is not present, mark_during_fresh_lookup value
determines either function is call by revalidate_cbk(discover_complete)
@@ -801,9 +810,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
call_frame_t *xattr_frame = NULL;
gf_boolean_t vol_down = _gf_false;
- this = frame->this;
-
GF_VALIDATE_OR_GOTO("dht", frame, out);
+ this = frame->this;
GF_VALIDATE_OR_GOTO("dht", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@@ -812,6 +820,7 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
conf = this->private;
layout = local->selfheal.layout;
local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
+
gf_uuid_unparse(local->gfid, gfid_local);
/* Code to update hashed subvol consider as a mds subvol
@@ -852,7 +861,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
"Failed to get hashed subvol for path %s"
"gfid is %s ",
local->loc.path, gfid_local);
- (*errst) = 1;
+ if (errst)
+ (*errst) = 1;
ret = -1;
goto out;
}
@@ -923,7 +933,44 @@ out:
return ret;
}
-int
+/* Get the value of key from dict in the bytewise and save in array after
+ convert from network byte order to host byte order
+*/
+static int32_t
+dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+ int *errst)
+{
+ void *ptr = NULL;
+ int32_t len = -1;
+ int32_t vindex = -1;
+ int32_t err = -1;
+ int ret = 0;
+
+ if (dict == NULL) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ (*errst) = -1;
+ return err;
+ }
+
+ if (len != (size * sizeof(int32_t))) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+
+ for (vindex = 0; vindex < size; vindex++) {
+ value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
+ if (value[vindex] < 0)
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static int
dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
dict_t *xattr, struct iatt *postparent)
@@ -1018,7 +1065,7 @@ dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->xattr == NULL) {
local->xattr = dict_ref(xattr);
} else {
- /* Don't aggregate for files. See BZ#1484113 */
+ /* Don't aggregate for files. See BZ#1484709 */
if (is_dir)
dht_aggregate_xattr(local->xattr, xattr);
}
@@ -1084,7 +1131,53 @@ out:
return 0;
}
-int
+static int
+dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Used to check whether this is a linkto file.
+ */
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->link_xattr_name, loc->path);
+ goto err;
+ }
+
+ /* This is used to make sure we don't unlink linkto files
+ * which are the target of an ongoing file migration.
+ */
+ ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+/* This is a gfid based nameless lookup. Without a name, the hashed subvol
+ * cannot be calculated so a lookup is sent to all subvols.
+ */
+static int
dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
int ret;
@@ -1098,6 +1191,9 @@ dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
conf = this->private;
local = frame->local;
+ /* As we do not know if this is a file or directory, request
+ * both file and directory xattrs
+ */
ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
if (ret) {
goto err;
@@ -1109,6 +1205,9 @@ dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
sizeof(uint32_t));
}
@@ -1150,48 +1249,11 @@ err:
return 0;
}
-/* Get the value of key from dict in the bytewise and save in array after
- convert from network byte order to host byte order
-*/
-int32_t
-dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
- int *errst)
-{
- void *ptr = NULL;
- int32_t len = -1;
- int32_t vindex = -1;
- int32_t err = -1;
- int ret = 0;
-
- if (dict == NULL) {
- (*errst) = -1;
- return -EINVAL;
- }
- err = dict_get_ptr_and_len(dict, key, &ptr, &len);
- if (err != 0) {
- (*errst) = -1;
- return err;
- }
-
- if (len != (size * sizeof(int32_t))) {
- (*errst) = -1;
- return -EINVAL;
- }
-
- for (vindex = 0; vindex < size; vindex++) {
- value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
- if (value[vindex] < 0)
- ret = -1;
- }
-
- return ret;
-}
-
/* Code to call syntask to heal custom xattr from hashed subvol
to non hashed subvol
*/
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
{
dht_local_t *copy_local = NULL;
call_frame_t *copy = NULL;
@@ -1203,6 +1265,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"No gfid exists for path %s "
"so healing xattr is not possible",
local->loc.path);
+ *op_errno = EIO;
goto out;
}
@@ -1216,6 +1279,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Memory allocation failed "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
} else {
copy_local->stbuf = local->stbuf;
@@ -1230,6 +1294,7 @@ dht_dir_xattr_heal(xlator_t *this, dht_local_t *local)
"Synctask creation failed to heal xattr "
"for path %s gfid %s ",
local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
DHT_STACK_DESTROY(copy);
}
}
@@ -1238,6 +1303,51 @@ out:
return ret;
}
+static int
+dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int needs_selfheal = 0;
+ int ret = 0;
+
+ local = frame->local;
+ layout = local->layout;
+
+ if (local->need_attrheal || local->need_xattr_heal ||
+ local->need_selfheal) {
+ needs_selfheal = 1;
+ }
+
+ ret = dht_layout_normalize(this, &local->loc, layout);
+
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path);
+ needs_selfheal = 1;
+ }
+ return needs_selfheal;
+}
+
+static int
+is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+{
+ if ((prot1->owner.read != prot2->owner.read) ||
+ (prot1->owner.write != prot2->owner.write) ||
+ (prot1->owner.exec != prot2->owner.exec) ||
+ (prot1->group.read != prot2->group.read) ||
+ (prot1->group.write != prot2->group.write) ||
+ (prot1->group.exec != prot2->group.exec) ||
+ (prot1->other.read != prot2->other.read) ||
+ (prot1->other.write != prot2->other.write) ||
+ (prot1->other.exec != prot2->other.exec) ||
+ (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
+ (prot1->sticky != prot2->sticky)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
int
dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
@@ -1255,8 +1365,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
char gfid_local[GF_UUID_BUF_SIZE] = {0};
char gfid_node[GF_UUID_BUF_SIZE] = {0};
int32_t mds_xattr_val[1] = {0};
- call_frame_t *copy = NULL;
- dht_local_t *copy_local = NULL;
GF_VALIDATE_OR_GOTO("dht", frame, out);
GF_VALIDATE_OR_GOTO("dht", this, out);
@@ -1269,7 +1377,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
layout = local->layout;
+ gf_msg_debug(this->name, op_errno,
+ "%s: lookup on %s returned with op_ret = %d, op_errno = %d",
+ local->loc.path, prev->name, op_ret, op_errno);
+ /* The first successful lookup*/
if (!op_ret && gf_uuid_is_null(local->gfid)) {
memcpy(local->gfid, stbuf->ia_gfid, 16);
}
@@ -1291,29 +1403,28 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
/* TODO: assert equal mode on stbuf->st_mode and
local->stbuf->st_mode
-
else mkdir/chmod/chown and fix
*/
ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
if (op_ret == -1) {
local->op_errno = op_errno;
- gf_msg_debug(this->name, op_errno,
- "lookup of %s on %s returned error", local->loc.path,
- prev->name);
+ /* The GFID is missing on this subvol. Force a heal. */
+ if (op_errno == ENODATA) {
+ local->need_lookup_everywhere = 1;
+ }
goto unlock;
}
is_dir = check_is_dir(inode, stbuf, xattr);
if (!is_dir) {
gf_msg_debug(this->name, 0,
- "lookup of %s on %s returned non"
- "dir 0%o"
+ "%s: lookup on %s returned non dir 0%o"
"calling lookup_everywhere",
local->loc.path, prev->name, stbuf->ia_type);
- local->need_selfheal = 1;
+ local->need_lookup_everywhere = 1;
goto unlock;
}
@@ -1324,19 +1435,31 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dht_aggregate_xattr(local->xattr, xattr);
}
- if (dict_get(xattr, conf->mds_xattr_key)) {
- local->mds_subvol = prev;
- local->mds_stbuf.ia_gid = stbuf->ia_gid;
- local->mds_stbuf.ia_uid = stbuf->ia_uid;
- local->mds_stbuf.ia_prot = stbuf->ia_prot;
+ if (__is_root_gfid(stbuf->ia_gfid)) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+ local->prebuf.ia_prot = stbuf->ia_prot;
+ }
+ }
}
if (local->stbuf.ia_type != IA_INVAL) {
- if (!__is_root_gfid(stbuf->ia_gfid) &&
- ((local->stbuf.ia_gid != stbuf->ia_gid) ||
- (local->stbuf.ia_uid != stbuf->ia_uid) ||
- (is_permission_different(&local->stbuf.ia_prot,
- &stbuf->ia_prot)))) {
+ /* This is not the first subvol to respond
+ * Compare values to see if attrs need to be healed
+ */
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ (is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot))) {
local->need_attrheal = 1;
}
}
@@ -1349,125 +1472,99 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!dict_get(xattr, conf->mds_xattr_key)) {
gf_msg_debug(this->name, 0,
- "Internal xattr %s is not present "
- " on path %s gfid is %s ",
- conf->mds_xattr_key, local->loc.path, gfid_local);
+ "%s: mds xattr %s is not present "
+ "on %s(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
goto unlock;
- } else {
- /* Save mds subvol on inode ctx */
- ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_SET_INODE_CTX_FAILED,
- "Failed to set hashed subvol for %s vol is %s",
- local->loc.path, prev->name);
- }
+ }
+
+ /* Save the mds subvol info and stbuf. This is the value that will
+ * be used for healing
+ */
+ local->mds_subvol = prev;
+ local->mds_stbuf = *stbuf;
+
+ /* Save mds subvol on inode ctx */
+
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "%s: Failed to set mds (%s)", local->loc.path, prev->name);
}
check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
mds_xattr_val, 1, &errst);
if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directories */
local->mds_xattr = dict_ref(xattr);
gf_msg_debug(this->name, 0,
- "Value of %s is not zero on hashed subvol "
- "so xattr needs to be heal on non hashed"
- " path is %s and vol name is %s "
- " gfid is %s",
- conf->mds_xattr_key, local->loc.path, prev->name,
+ "%s: %s is not zero on %s. Xattrs need to be healed."
+ "(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
gfid_local);
local->need_xattr_heal = 1;
- local->mds_subvol = prev;
}
}
+
unlock:
UNLOCK(&frame->lock);
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
+ /* If the mds subvol is not set correctly*/
+ if (!__is_root_gfid(local->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key))) {
+ local->need_selfheal = 1;
+ }
+
/* No need to call xattr heal code if volume count is 1
*/
- if (conf->subvolume_cnt == 1)
+ if (conf->subvolume_cnt == 1) {
local->need_xattr_heal = 0;
+ }
- /* Code to update all extended attributed from hashed subvol
- to local->xattr
- */
- if (local->need_xattr_heal && (local->mds_xattr)) {
- dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
- NULL, NULL);
- dict_unref(local->mds_xattr);
- local->mds_xattr = NULL;
+ if (local->need_selfheal || local->need_lookup_everywhere) {
+ /* Set the gfid-req so posix will set the GFID*/
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* Ok, this should _never_ happen */
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ } else {
+ if (!gf_uuid_is_null(local->gfid_req))
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid_req, 16);
+ }
}
- if (local->need_selfheal) {
- local->need_selfheal = 0;
+ if (local->need_lookup_everywhere) {
+ local->need_lookup_everywhere = 0;
dht_lookup_everywhere(frame, this, &local->loc);
return 0;
}
if (local->op_ret == 0) {
- ret = dht_layout_normalize(this, &local->loc, layout);
-
- if (ret != 0) {
- gf_msg_debug(this->name, 0, "fixing assignment on %s",
- local->loc.path);
+ if (dht_needs_selfheal(frame, this)) {
goto selfheal;
}
dht_layout_set(this, local->inode, layout);
- if (!dict_get(local->xattr, conf->mds_xattr_key) ||
- local->need_xattr_heal)
- goto selfheal;
- }
-
- if (local->inode) {
- dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
- }
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->postparent, 1);
- }
-
- if (local->need_attrheal) {
- local->need_attrheal = 0;
- if (!__is_root_gfid(inode->gfid)) {
- local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
- local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
- local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+ if (local->inode) {
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
}
- copy = create_frame(this, this->ctx->pool);
- if (copy) {
- copy_local = dht_local_init(copy, &local->loc, NULL, 0);
- if (!copy_local) {
- DHT_STACK_DESTROY(copy);
- goto skip_attr_heal;
- }
- copy_local->stbuf = local->stbuf;
- gf_uuid_copy(copy_local->loc.gfid, local->stbuf.ia_gfid);
- copy_local->mds_stbuf = local->mds_stbuf;
- copy_local->mds_subvol = local->mds_subvol;
- copy->local = copy_local;
- FRAME_SU_DO(copy, dht_local_t);
- ret = synctask_new(this->ctx->env, dht_dir_attr_heal,
- dht_dir_attr_heal_done, copy, copy);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Synctask creation failed to heal attr "
- "for path %s gfid %s ",
- local->loc.path, local->gfid);
- DHT_STACK_DESTROY(copy);
- }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
}
- skip_attr_heal:
DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
dht_set_fixed_dir_stat(&local->postparent);
/* Delete mds xattr at the time of STACK UNWIND */
if (local->xattr)
GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
+
DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf, local->xattr,
&local->postparent);
@@ -1483,24 +1580,57 @@ out:
return ret;
}
-int
-is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+static int
+dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- if ((prot1->owner.read != prot2->owner.read) ||
- (prot1->owner.write != prot2->owner.write) ||
- (prot1->owner.exec != prot2->owner.exec) ||
- (prot1->group.read != prot2->group.read) ||
- (prot1->group.write != prot2->group.write) ||
- (prot1->group.exec != prot2->group.exec) ||
- (prot1->other.read != prot2->other.read) ||
- (prot1->other.write != prot2->other.write) ||
- (prot1->other.exec != prot2->other.exec) ||
- (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
- (prot1->sticky != prot2->sticky)) {
- return 1;
- } else {
- return 0;
+ int call_cnt = 0;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", loc, unwind);
+
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ goto unwind;
}
+
+ if (local->xattr != NULL) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* use this gfid in order to heal any missing ones */
+ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = gfid-req",
+ local->loc.path);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(
+ frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
+ }
+ return 0;
+unwind:
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
}
int
@@ -1517,13 +1647,11 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int is_dir = 0;
int is_linkfile = 0;
int follow_link = 0;
- call_frame_t *copy = NULL;
- dht_local_t *copy_local = NULL;
char gfid[GF_UUID_BUF_SIZE] = {0};
uint32_t vol_commit_hash = 0;
xlator_t *subvol = NULL;
int32_t check_mds = 0;
- int errst = 0;
+ int errst = 0, i = 0;
int32_t mds_xattr_val[1] = {0};
GF_VALIDATE_OR_GOTO("dht", frame, err);
@@ -1537,6 +1665,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
if (!conf->vch_forced) {
+ /* Update the commithash value if available
+ */
ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
&vol_commit_hash);
if (ret == 0) {
@@ -1546,17 +1676,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_msg_debug(this->name, op_errno,
+ "%s: revalidate lookup on %s returned op_ret %d",
+ local->loc.path, prev->name, op_ret);
+
LOCK(&frame->lock);
{
if (gf_uuid_is_null(local->gfid)) {
memcpy(local->gfid, local->loc.gfid, 16);
}
- gf_msg_debug(this->name, op_errno,
- "revalidate lookup of %s "
- "returned with op_ret %d",
- local->loc.path, op_ret);
-
if (op_ret == -1) {
local->op_errno = op_errno;
@@ -1588,8 +1717,27 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path);
local->need_lookup_everywhere = 1;
+ } else if (IA_ISDIR(local->loc.inode->ia_type)) {
+ layout = local->layout;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == prev) {
+ layout->list[i].err = op_errno;
+ break;
+ }
+ }
+
+ local->need_selfheal = 1;
}
}
+
+ /* The GFID is missing on this subvol. Lookup everywhere to force a
+ * gfid heal
+ */
+ if ((op_errno == ENODATA) &&
+ (IA_ISDIR(local->loc.inode->ia_type))) {
+ local->need_lookup_everywhere = 1;
+ }
+
goto unlock;
}
@@ -1639,15 +1787,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
(local->stbuf.ia_uid != stbuf->ia_uid) ||
is_permission_different(&local->stbuf.ia_prot,
&stbuf->ia_prot)) {
- local->need_selfheal = 1;
+ local->need_attrheal = 1;
}
}
if (!dict_get(xattr, conf->mds_xattr_key)) {
gf_msg_debug(this->name, 0,
- "internal xattr %s is not present"
- " on path %s gfid is %s ",
- conf->mds_xattr_key, local->loc.path, gfid);
+ "%s: internal xattr %s is not present"
+ " on subvol %s(gfid is %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid);
} else {
check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
mds_xattr_val, 1, &errst);
@@ -1665,6 +1814,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->loc.path, prev->name);
}
if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directory
+ */
local->mds_xattr = dict_ref(xattr);
gf_msg_debug(this->name, 0,
"Value of %s is not zero on "
@@ -1680,6 +1831,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ret = dht_layout_dir_mismatch(this, layout, prev, &local->loc,
xattr);
if (ret != 0) {
+ /* In memory layout does not match on-disk layout.
+ */
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_MISMATCH,
"Mismatching layouts for %s, gfid = %s", local->loc.path,
gfid);
@@ -1706,6 +1859,8 @@ unlock:
UNLOCK(&frame->lock);
if (follow_link) {
+ /* Found a linkto file. Follow it to see if the target file exists
+ */
gf_uuid_copy(local->gfid, stbuf->ia_gfid);
subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
@@ -1735,71 +1890,31 @@ unlock:
local->need_xattr_heal = 0;
if (IA_ISDIR(local->stbuf.ia_type)) {
- /* Code to update all extended attributed from hashed
- subvol to local->xattr and call heal code to heal
- custom xattr from hashed subvol to non-hashed subvol
- */
- if (local->need_xattr_heal && (local->mds_xattr)) {
- dht_dir_set_heal_xattr(this, local, local->xattr,
- local->mds_xattr, NULL, NULL);
- dict_unref(local->mds_xattr);
- local->mds_xattr = NULL;
- local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "xattr heal failed for directory %s "
- " gfid %s ",
- local->loc.path, gfid);
- } else {
- /* Call function to save hashed subvol on inode
- ctx if internal mds xattr is not present and
- all subvols are up
- */
- if (inode && !__is_root_gfid(inode->gfid) && (!local->op_ret))
- (void)dht_common_mark_mdsxattr(frame, NULL, 1);
- }
- }
- if (local->need_selfheal) {
- local->need_selfheal = 0;
- if (!__is_root_gfid(inode->gfid)) {
- gf_uuid_copy(local->gfid, local->mds_stbuf.ia_gfid);
- local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
- local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
- local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
- } else {
- gf_uuid_copy(local->gfid, local->stbuf.ia_gfid);
- local->stbuf.ia_gid = local->prebuf.ia_gid;
- local->stbuf.ia_uid = local->prebuf.ia_uid;
- local->stbuf.ia_prot = local->prebuf.ia_prot;
- }
-
- copy = create_frame(this, this->ctx->pool);
- if (copy) {
- copy_local = dht_local_init(copy, &local->loc, NULL, 0);
- if (!copy_local) {
- DHT_STACK_DESTROY(copy);
- goto cont;
- }
- copy_local->stbuf = local->stbuf;
- copy_local->mds_stbuf = local->mds_stbuf;
- copy_local->mds_subvol = local->mds_subvol;
- copy->local = copy_local;
- FRAME_SU_DO(copy, dht_local_t);
- ret = synctask_new(this->ctx->env, dht_dir_attr_heal,
- dht_dir_attr_heal_done, copy, copy);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Synctask creation failed to heal attr "
- "for path %s gfid %s ",
- local->loc.path, local->gfid);
- DHT_STACK_DESTROY(copy);
+ /* No mds xattr found. Trigger a heal to set it */
+ if (!__is_root_gfid(local->loc.inode->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key)))
+ local->need_selfheal = 1;
+
+ if (dht_needs_selfheal(frame, this)) {
+ if (!__is_root_gfid(local->loc.inode->gfid)) {
+ if (local->mds_subvol) {
+ local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+ local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+ local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+ }
+ } else {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
}
+
+ layout = local->layout;
+ dht_selfheal_directory(frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+ return 0;
}
}
- cont:
+
if (local->layout_mismatch) {
/* Found layout mismatch in the directory, need to
fix this in the inode context */
@@ -1815,9 +1930,16 @@ unlock:
dht_layout_unref(this, local->layout);
local->layout = NULL;
- /* We know that current cached subvol is no more
+ /* We know that current cached subvol is no longer
valid, get the new one */
local->cached_subvol = NULL;
+ if (local->xattr_req) {
+ if (!gf_uuid_is_null(local->gfid)) {
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ }
+ }
+
dht_lookup_everywhere(frame, this, &local->loc);
return 0;
}
@@ -1858,12 +1980,11 @@ err:
return ret;
}
-int
-dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int
+dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cooie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
@@ -1925,7 +2046,7 @@ out:
return ret;
}
-int
+static int
dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -1951,7 +2072,7 @@ dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
struct iatt *preparent,
@@ -2003,7 +2124,7 @@ dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
struct iatt *preparent,
@@ -2020,52 +2141,41 @@ dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
local = frame->local;
- if (local && local->loc.path)
- path = local->loc.path;
+ if (local) {
+ FRAME_SU_UNDO(frame, dht_local_t);
+ if (local->loc.path)
+ path = local->loc.path;
+ }
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
"Returned with op_ret %d and "
"op_errno %d for %s",
op_ret, op_errno, ((path == NULL) ? "null" : path));
- FRAME_SU_UNDO(frame, dht_local_t);
DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
return 0;
}
-int
+static int
dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
{
int ret = 0;
- xlator_t *this = NULL;
- char *linktoskip_key = NULL;
- this = THIS;
- GF_VALIDATE_OR_GOTO("dht", this, err);
-
- if (dht_is_tier_xlator(this))
- linktoskip_key = TIER_SKIP_NON_LINKTO_UNLINK;
- else
- linktoskip_key = DHT_SKIP_NON_LINKTO_UNLINK;
-
- ret = dict_set_int32(dict, linktoskip_key, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
- ret = dict_set_int32(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
if (ret)
- goto err;
+ return -1;
return 0;
-
-err:
- return -1;
}
-int32_t
+static int32_t
dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *buf, dict_t *xdata,
@@ -2113,7 +2223,7 @@ dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie,
"Creating linkto file on %s(hash) to "
"%s on %s (gfid = %s)",
local->hashed_subvol->name, local->loc.path,
- local->cached_subvol->name, gfid);
+ local->cached_subvol->name, gfid_str);
ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk,
this, local->cached_subvol,
@@ -2139,7 +2249,7 @@ no_linkto:
return 0;
}
-int32_t
+static int32_t
dht_call_lookup_linkfile_create(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2225,7 +2335,7 @@ err:
* dht_lookup_everywhere_done takes decision based on any of the above case
*/
-int
+static int
dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this)
{
int ret = 0;
@@ -2251,6 +2361,16 @@ dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this)
DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
return 0;
}
+ if (local->op_ret && local->gfid_missing) {
+ if (gf_uuid_is_null(local->gfid_req)) {
+ DHT_STACK_UNWIND(lookup, frame, -1, ENODATA, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ /* A hack */
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
if (local->dir_count) {
dht_lookup_directory(frame, this, &local->loc);
@@ -2529,7 +2649,7 @@ unwind_hashed_and_cached:
return 0;
}
-int
+static int
dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xattr,
@@ -2570,6 +2690,8 @@ dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
if (op_errno != ENOENT)
local->op_errno = op_errno;
+ if (op_errno == ENODATA)
+ local->gfid_missing = _gf_true;
goto unlock;
}
@@ -2863,113 +2985,54 @@ out:
return 0;
}
-int
-dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int call_cnt = 0;
- int i = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO("dht", frame, out);
- GF_VALIDATE_OR_GOTO("dht", this, unwind);
- GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO("dht", loc, unwind);
-
- conf = this->private;
- local = frame->local;
-
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new(this, conf->subvolume_cnt);
- if (!local->layout) {
- goto unwind;
- }
-
- if (local->xattr != NULL) {
- dict_unref(local->xattr);
- local->xattr = NULL;
- }
-
- if (!gf_uuid_is_null(local->gfid)) {
- ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req",
- local->loc.path);
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND_COOKIE(
- frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
- }
- return 0;
-unwind:
- DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
-out:
- return 0;
-}
-
/* Code to get hashed subvol based on inode and loc
First it check if loc->parent and loc->path exist then it get
hashed subvol based on loc.
*/
-xlator_t *
-dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
+static gf_boolean_t
+dht_should_lookup_everywhere(xlator_t *this, dht_conf_t *conf, loc_t *loc)
{
- char *path = NULL;
- loc_t populate_loc = {
- 0,
- };
- char *name = NULL;
- xlator_t *hash_subvol = NULL;
-
- if (!inode)
- return hash_subvol;
+ dht_layout_t *parent_layout = NULL;
+ int ret = 0;
+ gf_boolean_t lookup_everywhere = _gf_true;
+
+ /* lookup-optimize supersedes lookup-unhashed settings.
+ * If it is set, do not process search_unhashed
+ * If lookup-optimize if enabled, lookup everywhere if:
+ * - this is the rebalance daemon.
+ * - loc->parent is unavailable.
+ * - parent_layout is unavailable
+ * - parent_layout->commit_hash != conf->vol_commit_hash
+ */
- if (loc && loc->parent && loc->path) {
- if (!loc->name) {
- name = strrchr(loc->path, '/');
- if (name) {
- loc->name = name + 1;
- } else {
- goto out;
+ if (conf->lookup_optimize) {
+ if (!conf->defrag && loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this, &parent_layout);
+ if (!ret && parent_layout &&
+ (parent_layout->commit_hash == conf->vol_commit_hash)) {
+ lookup_everywhere = _gf_false;
}
}
- hash_subvol = dht_subvol_get_hashed(this, loc);
goto out;
- }
-
- if (!gf_uuid_is_null(inode->gfid)) {
- populate_loc.inode = inode_ref(inode);
- populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
- inode_path(populate_loc.inode, NULL, &path);
-
- if (!path)
- goto out;
-
- populate_loc.path = path;
- if (!populate_loc.name && populate_loc.path) {
- name = strrchr(populate_loc.path, '/');
- if (name) {
- populate_loc.name = name + 1;
-
+ } else {
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ if (loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout ||
+ (!parent_layout->search_unhashed)) {
+ lookup_everywhere = _gf_false;
+ }
} else {
- goto out;
+ lookup_everywhere = _gf_false;
}
+
+ goto out;
}
- hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
}
out:
- if (populate_loc.inode)
- loc_wipe(&populate_loc);
- return hash_subvol;
+ return lookup_everywhere;
}
int
@@ -2985,7 +3048,6 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
loc_t *loc = NULL;
xlator_t *prev = NULL;
int ret = 0;
- dht_layout_t *parent_layout = NULL;
uint32_t vol_commit_hash = 0;
GF_VALIDATE_OR_GOTO("dht", frame, err);
@@ -3000,104 +3062,82 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local = frame->local;
loc = &local->loc;
- /* This is required for handling stale linkfile deletion,
- * or any more call which happens from this 'loc'.
- */
- if (!op_ret && gf_uuid_is_null(local->gfid))
- memcpy(local->gfid, stbuf->ia_gfid, 16);
-
gf_msg_debug(this->name, op_errno,
- "fresh_lookup returned for %s with op_ret %d", loc->path,
- op_ret);
+ "%s: fresh_lookup on %s returned with op_ret %d", loc->path,
+ prev->name, op_ret);
- if (!conf->vch_forced) {
- ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
- &vol_commit_hash);
- if (ret == 0) {
- conf->vol_commit_hash = vol_commit_hash;
- }
- }
-
- if (ENTRY_MISSING(op_ret, op_errno)) {
- if (1 == conf->subvolume_cnt) {
- /* No need to lookup again */
- goto out;
- }
+ if (op_ret == -1) {
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (1 == conf->subvolume_cnt) {
+ /* No need to lookup again */
+ goto out;
+ }
- gf_msg_debug(this->name, 0, "Entry %s missing on subvol %s", loc->path,
- prev->name);
+ gf_msg_debug(this->name, 0, "Entry %s missing on subvol %s",
+ loc->path, prev->name);
- /* lookup-optimize supersedes lookup-unhashed settings,
- * - so if it is set, do not process search_unhashed
- * - except, in the case of rebalance daemon, we want to
- * force the lookup_everywhere behavior */
- if (!conf->defrag && conf->lookup_optimize && loc->parent) {
- ret = dht_inode_ctx_layout_get(loc->parent, this, &parent_layout);
- if (ret || !parent_layout ||
- (parent_layout->commit_hash != conf->vol_commit_hash)) {
- gf_msg_debug(this->name, 0,
- "hashes don't match (ret - %d,"
- " parent_layout - %p, parent_hash - %x,"
- " vol_hash - %x), do global lookup",
- ret, parent_layout,
- (parent_layout ? parent_layout->commit_hash : -1),
- conf->vol_commit_hash);
+ if (dht_should_lookup_everywhere(this, conf, loc)) {
local->op_errno = ENOENT;
dht_lookup_everywhere(frame, this, loc);
return 0;
}
+
} else {
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere(frame, this, loc);
+ /* posix returns ENODATA if the gfid is not set but the client and
+ * server protocol layers do not send the stbuf. We need to
+ * heal this so check if this is a directory on the other subvols.
+ */
+ if ((op_errno == ENOTCONN) || (op_errno == ENODATA)) {
+ dht_lookup_directory(frame, this, &local->loc);
return 0;
}
-
- if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
- (loc->parent)) {
- ret = dht_inode_ctx_layout_get(loc->parent, this,
- &parent_layout);
- if (ret || !parent_layout)
- goto out;
- if (parent_layout->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere(frame, this, loc);
- return 0;
- }
- }
}
+ gf_msg_debug(this->name, op_errno, "%s: Lookup on subvolume %s failed",
+ loc->path, prev->name);
+ goto out;
}
- if (op_ret == 0) {
- is_dir = check_is_dir(inode, stbuf, xattr);
- if (is_dir) {
- local->inode = inode_ref(inode);
- local->xattr = dict_ref(xattr);
+ /* Lookup succeeded - op_ret = 0 */
+
+ /* This is required for handling stale linkfile deletion,
+ * or any more call which happens from this 'loc'.
+ */
+ if (gf_uuid_is_null(local->gfid)) {
+ /*This is set from the first successful response*/
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+
+ if (!conf->vch_forced) {
+ /* Update the commit hash in conf if it is found */
+ ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
}
}
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (is_dir) {
+ /* A directory is present on all subvols, send the lookup to
+ * all subvols now */
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
dht_lookup_directory(frame, this, &local->loc);
return 0;
}
- if (op_ret == -1) {
- gf_msg_debug(this->name, op_errno,
- "Lookup of %s for subvolume"
- " %s failed",
- loc->path, prev->name);
- goto out;
- }
-
is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
if (!is_linkfile) {
- /* non-directory and not a linkfile */
+ /* non-directory and not a linkto file. This is a data file
+ * Update the layout to point to the cached subvol
+ */
ret = dht_layout_preset(this, prev, inode);
if (ret < 0) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
- "could not set pre-set layout for subvolume %s", prev->name);
+ "%s: could not set pre-set layout for subvolume %s",
+ loc->path, prev->name);
op_ret = -1;
op_errno = EINVAL;
goto out;
@@ -3105,22 +3145,19 @@ dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
+ /* This is a linkto file. Get the value of the target subvol from the
+ * linkto xattr and lookup there to see if the file exists
+ */
subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
if (!subvol) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
- "linkfile not having link "
- "subvol for %s",
- loc->path);
-
- gf_msg_debug(this->name, 0,
- "linkfile not having link subvolume. path=%s", loc->path);
+ "%s: No link subvol for linkto", loc->path);
dht_lookup_everywhere(frame, this, loc);
return 0;
}
- gf_msg_debug(this->name, 0,
- "Calling lookup on linkto target %s for path %s", subvol->name,
- loc->path);
+ gf_msg_debug(this->name, 0, "%s: Calling lookup on linkto target %s",
+ loc->path, subvol->name);
STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
subvol->fops->lookup, &local->loc, local->xattr_req);
@@ -3147,11 +3184,11 @@ err:
return 0;
}
-/* For directories, check if acl xattrs have been requested (by the acl xlator),
- * if not, request for them. These xattrs are needed for dht dir self-heal to
- * perform proper self-healing of dirs
+/* For directories, check if acl xattrs have been requested (by the acl
+ * xlator), if not, request for them. These xattrs are needed for dht dir
+ * self-heal to perform proper self-healing of dirs
*/
-void
+static void
dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
{
int ret = 0;
@@ -3182,7 +3219,7 @@ dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
* the mds information : trusted.glusterfs.dht.mds
* the acl info: See above
*/
-int
+static int
dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
int ret = -EINVAL;
@@ -3223,50 +3260,109 @@ err:
return ret;
}
-int
-dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+/* If the hashed subvol is present, send the lookup to only that subvol first.
+ * If no hashed subvol, send a lookup to all subvols and proceed based on the
+ * responses.
+ */
+static int
+dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int ret = -EINVAL;
+ int ret = -1;
dht_conf_t *conf = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int call_cnt = 0;
+ int i = 0;
conf = this->private;
if (!conf) {
+ op_errno = EINVAL;
goto err;
}
- if (!xattr_req) {
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
goto err;
}
- /* Used to check whether this is a linkto file.
- */
- ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- conf->link_xattr_name, loc->path);
+ /* Since we don't know whether this is a file or a directory,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
goto err;
}
- /* This is used to make sure we don't unlink linkto files
- * which are the target of an ongoing file migration.
- */
- ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ op_errno = -ret;
goto err;
}
- ret = 0;
+ /* Fuse sets a random value in gfid-req. If the gfid is missing
+ * on one or more subvols, posix will set the gfid to this value,
+ * causing GFID mismatches for directories. Remove the value fuse
+ * has sent before sending the lookup.
+ */
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path);
+ } else {
+ dict_del(local->xattr_req, "gfid-req");
+ }
+ /* This should have been set in dht_lookup */
+ hashed_subvol = local->hashed_subvol;
+
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "%s: no subvolume in layout for path, "
+ "checking on all the subvols to see if "
+ "it is a directory",
+ loc->path);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ /* Allocate a layout. This will be populated and saved in
+ * the dht inode_ctx on successful lookup
+ */
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "%s: Found null hashed subvol. Calling lookup"
+ " on all nodes.",
+ loc->path);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
+ return 0;
+ }
+
+ /* if the hashed_subvol is non-null, send the lookup there first so
+ * as to see whether we have a file or a directory */
+ gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path,
+ hashed_subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->lookup, loc, local->xattr_req);
+ return 0;
err:
- return ret;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
-int
+static int
dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
xlator_t *subvol = NULL;
@@ -3341,6 +3437,11 @@ dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
}
local->mds_subvol = mds_subvol;
local->call_cnt = conf->subvolume_cnt;
+
+ /* local->call_cnt will change as responses are processed. Always use a
+ * local copy to loop through the STACK_WIND calls
+ */
+
call_cnt = local->call_cnt;
for (i = 0; i < call_cnt; i++) {
@@ -3374,91 +3475,11 @@ err:
return 0;
}
-int
-dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int ret = -1;
- dht_conf_t *conf = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int call_cnt = 0;
- int i = 0;
-
- conf = this->private;
- if (!conf) {
- op_errno = EINVAL;
- goto err;
- }
-
- local = frame->local;
- if (!local) {
- op_errno = EINVAL;
- goto err;
- }
-
- /* Since we don't know whether this is a file or a directory,
- * request all xattrs*/
- ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
- if (ret) {
- op_errno = -ret;
- goto err;
- }
-
- ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
- if (ret) {
- op_errno = -ret;
- goto err;
- }
-
- /* This should have been set in dht_lookup */
- hashed_subvol = local->hashed_subvol;
-
- if (!hashed_subvol) {
- gf_msg_debug(this->name, 0,
- "%s: no subvolume in layout for path, "
- "checking on all the subvols to see if "
- "it is a directory",
- loc->path);
-
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new(this, conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
-
- gf_msg_debug(this->name, 0,
- "%s: Found null hashed subvol. Calling lookup"
- " on all nodes.",
- loc->path);
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup, &local->loc,
- local->xattr_req);
- }
- return 0;
- }
-
- /* if we have the hashed_subvol, send the lookup there first so
- * as to see whether we have a file or a directory */
- gf_msg_debug(this->name, 0,
- "Calling fresh lookup for %s on"
- " %s",
- loc->path, hashed_subvol->name);
-
- STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
- hashed_subvol->fops->lookup, loc, local->xattr_req);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
+/* Depending on the input, decide if this is a:
+ * fresh-lookup: loc->name is provided but no dht inode ctx
+ * revalidation: loc->name is provided, dht inode ctx is present
+ * discover: gfid based nameless lookup.
+ */
int
dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
@@ -3512,6 +3533,10 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
/* Nameless lookup */
+ /* This is usually sent by NFS. Lookups are done based on the gfid and
+ * no name information is available. Without the name, dht cannot calculate
+ * the hash and has to send a lookup to all subvols.
+ */
if (gf_uuid_is_null(loc->pargfid) && !gf_uuid_is_null(loc->gfid) &&
!__is_root_gfid(loc->inode->gfid)) {
local->cached_subvol = NULL;
@@ -3520,6 +3545,9 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
}
if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
sizeof(uint32_t));
}
@@ -3528,12 +3556,14 @@ dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
hashed_subvol = dht_subvol_get_hashed(this, loc);
local->hashed_subvol = hashed_subvol;
- /* The entry has been looked up before and has an inode_ctx set
- */
if (is_revalidate(loc)) {
+ /* The entry has been looked up before and has a dht inode_ctx
+ */
dht_do_revalidate(frame, this, loc);
return 0;
} else {
+ /* Entry has not been looked up before
+ */
dht_do_fresh_lookup(frame, this, loc);
return 0;
}
@@ -3545,7 +3575,7 @@ err:
return 0;
}
-int
+static int
dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -3561,18 +3591,16 @@ dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_ret == -1) &&
!((op_errno == ENOENT) || (op_errno == ENOTCONN))) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
+ "Unlink link: subvolume %s returned -1", prev->name);
+ goto post_unlock;
}
local->op_ret = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
dht_set_fixed_dir_stat(&local->preparent);
dht_set_fixed_dir_stat(&local->postparent);
DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
@@ -3581,7 +3609,7 @@ unlock:
return 0;
}
-int
+static int
dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
@@ -3602,9 +3630,10 @@ dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
} else {
local->op_ret = 0;
}
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno,
"Unlink: subvolume %s returned -1", prev->name);
- goto unlock;
+ goto post_unlock;
}
local->op_ret = 0;
@@ -3619,9 +3648,8 @@ dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
&local->postparent, 1);
}
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!local->op_ret) {
hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
if (hashed_subvol && hashed_subvol != local->cached_subvol) {
@@ -3672,7 +3700,7 @@ dht_fix_layout_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -3687,22 +3715,24 @@ dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->name);
- goto unlock;
+ goto post_unlock;
}
local->op_ret = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
if ((local->fop == GF_FOP_SETXATTR) ||
(local->fop == GF_FOP_FSETXATTR)) {
DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
}
if ((local->fop == GF_FOP_REMOVEXATTR) ||
(local->fop == GF_FOP_FREMOVEXATTR)) {
@@ -3711,6 +3741,7 @@ unlock:
}
}
+out:
return 0;
}
@@ -3741,7 +3772,7 @@ dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size)
return ret;
}
-int
+static int
dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
dict_t *xdata)
@@ -3757,27 +3788,34 @@ dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, 0, op_errno, NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
DHT_STACK_UNWIND(fremovexattr, frame, 0, op_errno, NULL);
}
+out:
return 0;
}
/* Code to wind a xattrop call to add 1 on current mds internal xattr
value
*/
-int
+static int
dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -3798,11 +3836,14 @@ dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret && !local->op_ret) {
local->op_ret = op_ret;
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->this->name);
+ goto post_unlock;
}
}
UNLOCK(&frame->lock);
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
@@ -3834,45 +3875,60 @@ dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
} else {
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
}
}
out:
- if (xattrop)
- dict_unref(xattrop);
if (ret) {
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
}
}
+just_return:
+ if (xattrop)
+ dict_unref(xattrop);
return 0;
}
-int
+static int
dht_setxattr_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -3932,16 +3988,22 @@ out:
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
@@ -3949,10 +4011,11 @@ out:
NULL);
}
+just_return:
return 0;
}
-int
+static int
dht_xattrop_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
@@ -3999,16 +4062,22 @@ out:
if (local->fop == GF_FOP_SETXATTR) {
DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FSETXATTR) {
DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_REMOVEXATTR) {
DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
if (local->fop == GF_FOP_FREMOVEXATTR) {
@@ -4016,6 +4085,7 @@ out:
NULL);
}
+just_return:
return 0;
}
@@ -4058,7 +4128,7 @@ dht_fill_pathinfo_xattr(xlator_t *this, dht_local_t *local, char *xattr_buf,
}
}
-int
+static int
dht_vgetxattr_alloc_and_fill(dht_local_t *local, dict_t *xattr, xlator_t *this,
int op_errno)
{
@@ -4107,7 +4177,7 @@ out:
return ret;
}
-int
+static int
dht_vgetxattr_fill_and_set(dht_local_t *local, dict_t **dict, xlator_t *this,
gf_boolean_t flag)
{
@@ -4163,7 +4233,7 @@ out:
return ret;
}
-int
+static int
dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -4202,11 +4272,15 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
this_call_cnt = --local->call_cnt;
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
- "getxattr err for dir");
local->op_ret = -1;
local->op_errno = op_errno;
- goto unlock;
+ UNLOCK(&frame->lock);
+ if (op_errno == ENODATA)
+ gf_msg_debug(this->name, 0, "failed to get node-uuid");
+ else
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid");
+ goto post_unlock;
}
ret = dict_get_str(xattr, local->xsel, &uuid_list);
@@ -4227,18 +4301,19 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
index = conf->local_subvols_cnt;
uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
uuid_str = next_uuid_str) {
next_uuid_str = strtok_r(NULL, " ", &saveptr);
if (gf_uuid_parse(uuid_str, node_uuid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR,
- "Failed to parse uuid"
- " for %s",
- prev->name);
local->op_ret = -1;
local->op_errno = EINVAL;
- goto unlock;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR,
+ "Failed to parse uuid for %s", prev->name);
+ goto post_unlock;
}
count++;
@@ -4295,7 +4370,7 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = 0;
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!is_last_call(this_call_cnt))
goto out;
@@ -4317,7 +4392,7 @@ out:
return 0;
}
-int
+static int
dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
@@ -4336,23 +4411,28 @@ dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
this_call_cnt = --local->call_cnt;
if (op_ret < 0) {
if (op_errno != ENOTCONN) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir");
local->op_ret = -1;
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir");
+ goto post_unlock;
}
goto unlock;
}
ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno);
- if (ret)
+ if (ret) {
+ UNLOCK(&frame->lock);
gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DICT_SET_FAILED,
"alloc or fill failure");
+ goto post_unlock;
+ }
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (!is_last_call(this_call_cnt))
goto out;
@@ -4378,7 +4458,7 @@ out:
return 0;
}
-int
+static int
dht_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xattr, dict_t *xdata)
{
@@ -4424,7 +4504,7 @@ cleanup:
return 0;
}
-int
+static int
dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -4446,16 +4526,16 @@ dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
- VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
conf = this->private;
local = frame->local;
@@ -4464,9 +4544,7 @@ dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = op_ret;
goto out;
}
- if (dict_get(xattr, conf->xattr_name)) {
- dict_del(xattr, conf->xattr_name);
- }
+ dict_del(xattr, conf->xattr_name);
local->op_ret = 0;
if (!local->xattr) {
@@ -4477,6 +4555,9 @@ out:
DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr,
xdata);
return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
int
@@ -4486,14 +4567,22 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int this_call_cnt = 0;
dht_local_t *local = NULL;
dht_conf_t *conf = NULL;
+ int ret = 0;
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
- VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
conf = this->private;
local = frame->local;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
LOCK(&frame->lock);
{
if (!xattr || (op_ret == -1)) {
@@ -4501,27 +4590,10 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto unlock;
}
- if (dict_get(xattr, conf->xattr_name)) {
- dict_del(xattr, conf->xattr_name);
- }
-
- if (dict_get(xattr, conf->mds_xattr_key)) {
- dict_del(xattr, conf->mds_xattr_key);
- }
-
- /* filter out following two xattrs that need not
- * be visible on the mount point for geo-rep -
- * trusted.tier.fix.layout.complete and
- * trusted.tier.tier-dht.commithash
- */
-
- if (dict_get(xattr, conf->commithash_xattr_name)) {
- dict_del(xattr, conf->commithash_xattr_name);
- }
+ dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
- if (frame->root->pid >= 0 && dht_is_tier_xlator(this)) {
- dict_del(xattr, GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- }
+ dict_del(xattr, conf->commithash_xattr_name);
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
@@ -4547,7 +4619,6 @@ unlock:
UNLOCK(&frame->lock);
this_call_cnt = dht_frame_return(frame);
-out:
if (is_last_call(this_call_cnt)) {
/* If we have a valid xattr received from any one of the
* subvolume, let's return it */
@@ -4559,9 +4630,12 @@ out:
local->xdata);
}
return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
-int32_t
+static int32_t
dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
dict_t *xdata)
{
@@ -4569,7 +4643,7 @@ dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
return 0;
}
-int
+static int
dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
dict_t *xattr, dict_t *xdata)
@@ -4581,7 +4655,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
LOCK(&frame->lock);
{
- if (local->op_errno == ENODATA || local->op_errno == EOPNOTSUPP) {
+ if (local->op_errno == EOPNOTSUPP) {
/* Nothing to do here, we have already found
* a subvol which does not have the get_real_filename
* optimization. If condition is for simple logic.
@@ -4590,7 +4664,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
}
if (op_ret == -1) {
- if (op_errno == ENODATA || op_errno == EOPNOTSUPP) {
+ if (op_errno == EOPNOTSUPP) {
/* This subvol does not have the optimization.
* Better let the user know we don't support it.
* Remove previous results if any.
@@ -4608,16 +4682,17 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
local->op_ret = op_ret;
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg(this->name, GF_LOG_WARNING, op_errno,
DHT_MSG_UPGRADE_BRICKS,
"At least "
"one of the bricks does not support "
"this operation. Please upgrade all "
"bricks.");
- goto unlock;
+ goto post_unlock;
}
- if (op_errno == ENOENT) {
+ if (op_errno == ENOATTR) {
/* Do nothing, our defaults are set to this.
*/
goto unlock;
@@ -4629,9 +4704,10 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
* down subvol and return a good result(if any)
* from other subvol.
*/
+ UNLOCK(&frame->lock);
gf_msg(this->name, GF_LOG_WARNING, op_errno,
DHT_MSG_GET_XATTR_FAILED, "Failed to get real filename.");
- goto unlock;
+ goto post_unlock;
}
/* This subvol has the required file.
@@ -4652,13 +4728,13 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
local->op_ret = op_ret;
local->op_errno = 0;
- gf_msg_debug(this->name, 0,
- "Found a matching "
- "file.");
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, 0, "Found a matching file.");
+ goto post_unlock;
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
DHT_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
@@ -4668,7 +4744,7 @@ unlock:
return 0;
}
-int
+static int
dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key, dict_t *xdata)
{
@@ -4684,7 +4760,7 @@ dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
cnt = local->call_cnt = layout->cnt;
local->op_ret = -1;
- local->op_errno = ENOENT;
+ local->op_errno = ENOATTR;
for (i = 0; i < cnt; i++) {
subvol = layout->list[i].xlator;
@@ -4695,7 +4771,7 @@ dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
return 0;
}
-int
+static int
dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
xlator_t **subvols)
{
@@ -4712,8 +4788,8 @@ dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
return layout->cnt;
}
-int
-dht_is_debug_xattr_key(char **array, char *key)
+static int
+dht_is_debug_xattr_key(const char **array, char *key)
{
int i = 0;
@@ -4727,7 +4803,7 @@ dht_is_debug_xattr_key(char **array, char *key)
/* Note we already have frame->local initialised here*/
-int
+static int
dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key)
{
@@ -4739,10 +4815,6 @@ dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name = NULL;
local = frame->local;
- if (!key) {
- op_errno = EINVAL;
- goto out;
- }
if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) == -1) {
goto out;
@@ -4796,6 +4868,60 @@ out:
return 0;
}
+/* Virtual Xattr which returns 1 if all subvols are up,
+ else returns 0. Geo-rep then uses this virtual xattr
+ after a fresh mount and starts the I/O.
+*/
+
+enum dht_vxattr_subvol {
+ DHT_VXATTR_SUBVOLS_UP = 1,
+ DHT_VXATTR_SUBVOLS_DOWN = 0,
+};
+
+int
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ int value = DHT_VXATTR_SUBVOLS_UP;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!key) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ value = DHT_VXATTR_SUBVOLS_DOWN;
+ gf_msg_debug(this->name, 0, "subvol %s is down ",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ ret = dict_set_int8(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
+}
+
int
dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
dict_t *xdata)
@@ -4853,6 +4979,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
goto err;
}
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
+ dht_vgetxattr_subvol_status(frame, this, key);
+ return 0;
+ }
+
/* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
if (!DHT_IS_DIR(layout))
goto no_dht_is_dir;
@@ -5085,8 +5216,7 @@ dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
}
}
- if (fd->inode)
- gf_uuid_unparse(fd->inode->gfid, gfid);
+ gf_uuid_unparse(fd->inode->gfid, gfid);
if ((fd->inode->ia_type == IA_IFDIR) && key &&
(strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) !=
@@ -5149,6 +5279,53 @@ err:
return 0;
}
+static int
+dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, &local->loc,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
+ NULL);
+ return 0;
+}
+
int
dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
@@ -5165,8 +5342,8 @@ dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSETXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -5239,7 +5416,7 @@ dht_is_user_xattr(dict_t *this, char *key, data_t *value, void *data)
/* Common code to wind a (f)(set|remove)xattr call to set xattr on directory
*/
-int
+static int
dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
fd_t *fd, dict_t *xattr, int flags,
dict_t *xdata, int *op_errno)
@@ -5256,11 +5433,13 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int call_cnt = 0;
dht_local_t *local = NULL;
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
if (!gf_uuid_is_null(local->gfid)) {
gf_uuid_unparse(local->gfid, gfid_local);
@@ -5343,9 +5522,8 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
} else {
gf_msg(this->name, GF_LOG_ERROR, 0,
DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "Failed to get mds subvol for path %s"
- "gfid is %s ",
- loc->path, gfid_local);
+ "%s: Failed to get mds subvol. (gfid is %s)", loc->path,
+ gfid_local);
}
(*op_errno) = ENOENT;
goto err;
@@ -5493,7 +5671,7 @@ err:
return 0;
}
-int
+static int
dht_checking_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
dict_t *xdata)
@@ -5532,54 +5710,7 @@ out:
return 0;
}
-int
-dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
-{
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
-
- if (!frame || !frame->local)
- goto err;
-
- local = frame->local;
- op_errno = local->op_errno;
-
- if (we_are_not_migrating(ret)) {
- /* This dht xlator is not migrating the file. Unwind and
- * pass on the original mode bits so the higher DHT layer
- * can handle this.
- */
- DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
- local->rebalance.xdata);
- return 0;
- }
-
- if (subvol == NULL)
- goto err;
-
- local->call_cnt = 2; /* This is the second attempt */
-
- if (local->fop == GF_FOP_SETXATTR) {
- STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
- subvol->fops->setxattr, &local->loc,
- local->rebalance.xattr, local->rebalance.flags,
- local->xattr_req);
- } else {
- STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
- subvol->fops->fsetxattr, local->fd,
- local->rebalance.xattr, local->rebalance.flags,
- local->xattr_req);
- }
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
- NULL);
- return 0;
-}
-
-int
+static int
dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -5588,7 +5719,7 @@ dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_nuke_dir(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
{
if (!IA_ISDIR(loc->inode->ia_type)) {
@@ -5741,22 +5872,7 @@ dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
if (local->rebalance.target_node) {
local->flags = forced_rebalance;
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- tmp = dict_get(xattr, TIERING_MIGRATION_KEY);
- if (tmp)
- frame->root->pid = GF_CLIENT_PID_TIER_DEFRAG;
- else
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
ret = dht_start_rebalance_task(this, frame);
if (!ret)
@@ -5875,6 +5991,50 @@ err:
return 0;
}
+static int
+dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, &local->loc, local->key,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
int
dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
@@ -5891,8 +6051,8 @@ dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FREMOVEXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -5952,84 +6112,6 @@ out:
}
int
-dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
-{
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
-
- if (!frame || !frame->local)
- goto err;
-
- local = frame->local;
- op_errno = local->op_errno;
-
- local->call_cnt = 2; /* This is the second attempt */
-
- if (we_are_not_migrating(ret)) {
- /* This dht xlator is not migrating the file. Unwind and
- * pass on the original mode bits so the higher DHT layer
- * can handle this.
- */
- DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
- local->rebalance.xdata);
- return 0;
- }
-
- if (subvol == NULL)
- goto err;
-
- if (local->fop == GF_FOP_REMOVEXATTR) {
- STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
- subvol->fops->removexattr, &local->loc, local->key,
- local->xattr_req);
- } else {
- STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
- subvol->fops->fremovexattr, local->fd, local->key,
- local->xattr_req);
- }
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-dht_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- this_call_cnt = dht_frame_return(frame);
- if (is_last_call(this_call_cnt)) {
- DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
- NULL);
- }
-
- return 0;
-}
-
-int
dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *key, dict_t *xdata)
{
@@ -6206,16 +6288,16 @@ dht_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->name);
- goto unlock;
+ goto post_unlock;
}
local->op_ret = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt))
DHT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, local->fd,
@@ -6227,7 +6309,7 @@ unlock:
/*
* dht_normalize_stats -
*/
-void
+static void
dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
unsigned long frsize)
{
@@ -6246,7 +6328,7 @@ dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
}
}
-int
+static int
dht_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
@@ -6356,9 +6438,7 @@ dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
int i = -1;
inode_t *inode = NULL;
inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
loc_t newloc = {
0,
};
@@ -6384,7 +6464,6 @@ dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
}
loc = &local->loc2;
- root_gfid[15] = 1;
inode = inode_find(itable, root_gfid);
if (!inode) {
@@ -6493,15 +6572,16 @@ err:
}
/* dht_readdirp_cbk creates a new dentry and dentry->inode is not assigned.
- This functions assigns an inode if all of the following conditions are true:
+ This functions assigns an inode if all of the following conditions are
+ true:
- * DHT has only one child. In this case the entire layout is present on this
- single child and hence we can set complete layout in inode.
- * backend has complete layout and there are no anomalies in it and from this
- information layout can be constructed and set in inode.
+ * DHT has only one child. In this case the entire layout is present on
+ this single child and hence we can set complete layout in inode.
+ * backend has complete layout and there are no anomalies in it and from
+ this information layout can be constructed and set in inode.
*/
-void
+static void
dht_populate_inode_for_dentry(xlator_t *this, xlator_t *subvol,
gf_dirent_t *entry, gf_dirent_t *orig_entry)
{
@@ -6545,9 +6625,10 @@ out:
return;
}
-/* Posix returns op_errno = ENOENT to indicate that there are no more entries
+/* Posix returns op_errno = ENOENT to indicate that there are no more
+ * entries
*/
-int
+static int
dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
@@ -6603,10 +6684,9 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
layout = local->layout;
- /* We have seen crashes in while running "rm -rf" on tier volumes
- when the layout was NULL on the hot tier. This will skip the
- entries on the subvol without a layout, hence preventing the crash
- but rmdir might fail with "directory not empty" errors*/
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
if (layout == NULL)
goto done;
@@ -6624,13 +6704,12 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
orig_entry->d_name, orig_entry->d_type);
if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
- /*stat failed somewhere- ignore this entry*/
- gf_msg_debug(this->name, EINVAL,
- "Invalid stat, ignoring entry "
- "%s gfid %s",
+ /*stat failed somewhere- display this entry but the data may
+ * be inaccurate.
+ */
+ gf_msg_debug(this->name, EINVAL, "Invalid stat for %s (gfid %s)",
orig_entry->d_name,
uuid_utoa(orig_entry->d_stat.ia_gfid));
- continue;
}
if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
@@ -6712,6 +6791,12 @@ dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
}
}
} else {
+ if (orig_entry->dict &&
+ dict_get(orig_entry->dict, conf->link_xattr_name)) {
+ /* Strip out the S and T flags set by rebalance*/
+ DHT_STRIP_PHASE1_FLAGS(&entry->d_stat);
+ }
+
if (orig_entry->inode) {
ret = dht_layout_preset(this, prev, orig_entry->inode);
if (ret)
@@ -6833,7 +6918,7 @@ unwind:
return 0;
}
-int
+static int
dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
@@ -6956,7 +7041,7 @@ unwind:
return 0;
}
-int
+static int
dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t yoff, int whichop, dict_t *dict)
{
@@ -7080,7 +7165,7 @@ dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
return 0;
}
-int
+static int
dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -7093,12 +7178,10 @@ dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1)
local->op_errno = op_errno;
-
- if (op_ret == 0)
+ else if (op_ret == 0)
local->op_ret = 0;
}
UNLOCK(&frame->lock);
-
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt))
DHT_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
@@ -7212,7 +7295,7 @@ out:
return 0;
}
-int
+static int
dht_mknod_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -7263,7 +7346,7 @@ err:
return 0;
}
-int
+static int
dht_mknod_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
xlator_t *subvol, loc_t *loc, dev_t rdev,
mode_t mode, mode_t umask, dict_t *params)
@@ -7309,7 +7392,7 @@ out:
return 0;
}
-int32_t
+static int32_t
dht_mknod_do(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -7359,7 +7442,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7367,7 +7450,7 @@ dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_mknod_finish(call_frame_t *frame, xlator_t *this, int op_ret,
int invoke_cbk)
{
@@ -7419,7 +7502,7 @@ done:
return 0;
}
-int32_t
+static int32_t
dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7448,11 +7531,15 @@ dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- dht_mknod_finish(frame, this, -1, 0);
+ if (local)
+ dht_mknod_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(mknod, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
}
-int32_t
+static int32_t
dht_mknod_lock(call_frame_t *frame, xlator_t *subvol)
{
dht_local_t *local = NULL;
@@ -7497,7 +7584,7 @@ err:
return -1;
}
-int
+static int
dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
int invoke_cbk)
{
@@ -7527,7 +7614,7 @@ dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
return 0;
}
-int
+static int
dht_refresh_parent_layout_done(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -7548,7 +7635,7 @@ resume:
return 0;
}
-int
+static int
dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
{
call_frame_t *refresh_frame = NULL, *frame = NULL;
@@ -7583,7 +7670,7 @@ dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
return 0;
}
-int32_t
+static int32_t
dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -7606,7 +7693,7 @@ dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_guard_parent_layout_and_namespace(xlator_t *subvol, call_stub_t *stub)
{
dht_local_t *local = NULL;
@@ -7917,7 +8004,58 @@ err:
return 0;
}
-int
+static int
+dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
+
+static int
+dht_remove_stale_linkto(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata_in = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ xdata_in = dict_new();
+ if (!xdata_in)
+ goto out;
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Failed to set keys for stale linkto"
+ "deletion on path %s",
+ local->loc.path);
+ goto out;
+ }
+
+ ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Removal of linkto failed"
+ " on path %s at subvol %s",
+ local->loc.path, local->link_subvol->name);
+ }
+out:
+ if (xdata_in)
+ dict_unref(xdata_in);
+ return ret;
+}
+
+static int
dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -8033,7 +8171,7 @@ out:
return 0;
}
-int
+static int
dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -8089,7 +8227,7 @@ err:
return 0;
}
-int
+static int
dht_link_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -8192,6 +8330,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
xlator_t *prev = NULL;
int ret = -1;
dht_local_t *local = NULL;
+ gf_boolean_t parent_layout_changed = _gf_false;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
local = frame->local;
if (!local) {
@@ -8200,8 +8343,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto out;
}
- if (op_ret == -1)
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? _gf_true
+ : _gf_false;
+
+ if (parent_layout_changed) {
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* Returning failure as the layout could not be fixed even under
+ * the lock */
+ goto out;
+ }
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "create (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a layout refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ /*
+ dht_refresh_layout needs directory info in local->loc.Hence,
+ storing the parent_loc in local->loc and storing the create
+ context in local->loc2. We will restore this information in
+ dht_creation_do.
+ */
+
+ loc_wipe(&local->loc2);
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", local->loc.path);
+
+ goto out;
+ }
+
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
+ &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto out;
+ }
+
+ subvol = dht_subvol_get_hashed(this, &local->loc2);
+
+ ret = dht_create_lock(frame, subvol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto out;
+ }
+
+ return 0;
+ }
+
goto out;
+ }
prev = cookie;
@@ -8256,7 +8460,7 @@ out:
return 0;
}
-int
+static int
dht_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
inode_t *inode, struct iatt *stbuf,
@@ -8307,7 +8511,7 @@ err:
return 0;
}
-int
+static int
dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
xlator_t *subvol, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd,
@@ -8322,6 +8526,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8330,10 +8536,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
if (avail_subvol != subvol) {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = avail_subvol;
local->hashed_subvol = subvol;
@@ -8349,6 +8551,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
subvol->name);
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, loc, flags, mode, umask, fd,
params);
@@ -8417,7 +8621,7 @@ out:
return ret;
}
-int32_t
+static int32_t
dht_create_do(call_frame_t *frame)
{
dht_local_t *local = NULL;
@@ -8467,7 +8671,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8475,7 +8679,7 @@ dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
+static int32_t
dht_create_finish(call_frame_t *frame, xlator_t *this, int op_ret,
int invoke_cbk)
{
@@ -8527,7 +8731,7 @@ done:
return 0;
}
-int32_t
+static int32_t
dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8556,7 +8760,11 @@ dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
err:
- dht_create_finish(frame, this, -1, 0);
+ if (local)
+ dht_create_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
@@ -8606,6 +8814,60 @@ err:
}
int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
+{
+ dht_conf_t *conf = this->private;
+ dht_layout_t *parent_layout = NULL;
+ int *parent_disk_layout = NULL;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ parent_layout = dht_layout_get(this, loc->parent);
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+err:
+ dht_layout_unref(this, parent_layout);
+ return ret;
+}
+
+int
dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
@@ -8631,6 +8893,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
+ local->params = dict_ref(params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
"creating %s on %s (got create on %s)", local->loc.path,
@@ -8646,10 +8913,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (hashed_subvol && (hashed_subvol != subvol)) {
/* Create the linkto file and then the data file */
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
local->cached_subvol = subvol;
local->hashed_subvol = hashed_subvol;
@@ -8662,6 +8925,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
* file as we expect a lookup everywhere if there are problems
* with the parent layout
*/
+
+ dht_set_parent_layout_in_dict(loc, this, local);
+
STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
subvol->fops->create, &local->loc, flags, mode, umask,
fd, params);
@@ -8713,11 +8979,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto err;
}
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
-
loc_wipe(&local->loc);
ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
@@ -8755,7 +9016,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -8789,7 +9050,7 @@ dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -8857,13 +9118,13 @@ unlock:
return 0;
}
-int
+static int
dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata);
-int
+static int
dht_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *params)
{
@@ -8986,7 +9247,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -9003,8 +9264,6 @@ dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_boolean_t parent_layout_changed = _gf_false;
call_stub_t *stub = NULL;
- VALIDATE_OR_GOTO(this->private, err);
-
local = frame->local;
prev = cookie;
layout = local->layout;
@@ -9119,7 +9378,7 @@ err:
return 0;
}
-int
+static int
dht_mkdir_guard_parent_layout_cbk(call_frame_t *frame, xlator_t *this,
loc_t *loc, mode_t mode, mode_t umask,
dict_t *params)
@@ -9270,7 +9529,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -9292,7 +9551,7 @@ dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_rmdir_hashed_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -9395,7 +9654,63 @@ err:
return 0;
}
-int
+static int
+dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
+
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
+
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL)
+ goto done;
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL)
+ goto done;
+
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
+
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_rmdir_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ return 0;
+}
+
+static int
dht_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
@@ -9530,63 +9845,7 @@ err:
return 0;
}
-int
-dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- DHT_STACK_DESTROY(frame);
- return 0;
-}
-
-int
-dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
-{
- dht_local_t *local = NULL, *lock_local = NULL;
- call_frame_t *lock_frame = NULL;
- int lock_count = 0;
-
- local = frame->local;
-
- /* Unlock entrylk */
- dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
-
- /* Unlock inodelk */
- lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
- local->lock[0].ns.parent_layout.lk_count);
-
- if (lock_count == 0)
- goto done;
-
- lock_frame = copy_frame(frame);
- if (lock_frame == NULL)
- goto done;
-
- lock_local = dht_local_init(lock_frame, &local->loc, NULL,
- lock_frame->root->op);
- if (lock_local == NULL)
- goto done;
-
- lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
- .ns.parent_layout.locks;
- lock_local->lock[0]
- .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
-
- local->lock[0].ns.parent_layout.locks = NULL;
- local->lock[0].ns.parent_layout.lk_count = 0;
- dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
- lock_local->lock[0].ns.parent_layout.lk_count,
- dht_rmdir_unlock_cbk);
- lock_frame = NULL;
-
-done:
- if (lock_frame != NULL) {
- DHT_STACK_DESTROY(lock_frame);
- }
-
- return 0;
-}
-
-int
+static int
dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
@@ -9595,8 +9854,6 @@ dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int i = 0;
xlator_t *hashed_subvol;
- VALIDATE_OR_GOTO(this->private, err);
-
conf = this->private;
local = frame->local;
@@ -9629,7 +9886,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_do(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -9638,13 +9895,13 @@ dht_rmdir_do(call_frame_t *frame, xlator_t *this)
xlator_t *hashed_subvol = NULL;
char gfid[GF_UUID_BUF_SIZE] = {0};
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
+ VALIDATE_OR_GOTO(frame->local, err);
local = frame->local;
+ VALIDATE_OR_GOTO(this->private, out);
+ conf = this->private;
if (local->op_ret == -1)
- goto err;
+ goto out;
local->call_cnt = conf->subvolume_cnt;
@@ -9676,21 +9933,80 @@ dht_rmdir_do(call_frame_t *frame, xlator_t *this)
if (ret < 0) {
local->op_ret = -1;
local->op_errno = errno ? errno : EINVAL;
- goto err;
+ goto out;
}
return 0;
-err:
+out:
dht_set_fixed_dir_stat(&local->preparent);
dht_set_fixed_dir_stat(&local->postparent);
DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
&local->preparent, &local->postparent, NULL);
return 0;
+err:
+ DHT_STACK_UNWIND(rmdir, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
}
-int
+static void
+dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = readdirp_frame->local;
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ /* At least one readdirp failed.
+ * This is a bit hit or miss - if readdirp failed on more than
+ * one subvol, we don't know which error is returned.
+ */
+ if (local->op_ret == -1) {
+ main_local->op_ret = local->op_ret;
+ main_local->op_errno = local->op_errno;
+ }
+
+ this_call_cnt = dht_frame_return(main_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_do(main_frame, this);
+
+ DHT_STACK_DESTROY(readdirp_frame);
+}
+
+/* Keep sending readdirp on the subvol until it returns no more entries
+ * It is possible that not all entries will fit in a single readdirp in
+ * which case the rmdir will keep failing with ENOTEMPTY
+ */
+
+static int
+dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = readdirp_frame->local;
+
+ if (local->op_ret == -1) {
+ /* there is no point doing another readdirp on this
+ * subvol . */
+ dht_rmdir_readdirp_done(readdirp_frame, this);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
+ local->xattr);
+
+ return 0;
+}
+
+static int
dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
@@ -9734,7 +10050,7 @@ dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int
+static int
dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
@@ -9759,8 +10075,7 @@ dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_FILE_LOOKUP_FAILED,
- "lookup failed for %s on %s (type=0%o)", local->loc.path,
- src->name, stbuf->ia_type);
+ "lookup failed for %s on %s", local->loc.path, src->name);
goto err;
}
@@ -9790,7 +10105,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_cached_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
@@ -9870,7 +10185,7 @@ err:
return 0;
}
-int
+static int
dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this,
gf_dirent_t *entries, xlator_t *src)
{
@@ -9966,7 +10281,10 @@ dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this,
lookup_local->loc.path, src->name, gfid);
subvol = dht_linkfile_subvol(this, NULL, &trav->d_stat, trav->dict);
- if (!subvol) {
+ if (!subvol || (subvol == src)) {
+ /* we need to delete the linkto file if it does not have a
+ * valid subvol or it points to itself.
+ */
gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_INVALID_LINKFILE,
"Linkfile does not have link subvolume. "
"path = %s, gfid = %s",
@@ -10028,36 +10346,7 @@ err:
* No more entries on this subvol. Proceed to the actual rmdir operation.
*/
-void
-dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
-{
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
-
- local = readdirp_frame->local;
- main_frame = local->main_frame;
- main_local = main_frame->local;
-
- /* At least one readdirp failed.
- * This is a bit hit or miss - if readdirp failed on more than
- * one subvol, we don't know which error is returned.
- */
- if (local->op_ret == -1) {
- main_local->op_ret = local->op_ret;
- main_local->op_errno = local->op_errno;
- }
-
- this_call_cnt = dht_frame_return(main_frame);
-
- if (is_last_call(this_call_cnt))
- dht_rmdir_do(main_frame, this);
-
- DHT_STACK_DESTROY(readdirp_frame);
-}
-
-int
+static int
dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, gf_dirent_t *entries,
dict_t *xdata)
@@ -10107,34 +10396,7 @@ dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-/* Keep sending readdirp on the subvol until it returns no more entries
- * It is possible that not all entries will fit in a single readdirp in which
- * case the rmdir will keep failing with ENOTEMPTY
- */
-
-int
-dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
-{
- dht_local_t *local = NULL;
-
- local = readdirp_frame->local;
-
- if (local->op_ret == -1) {
- /* there is no point doing another readdirp on this
- * subvol . */
- dht_rmdir_readdirp_done(readdirp_frame, this);
- return 0;
- }
-
- STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
- local->hashed_subvol, local->hashed_subvol,
- local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
- local->xattr);
-
- return 0;
-}
-
-int
+static int
dht_rmdir_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
@@ -10254,6 +10516,8 @@ dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
dht_conf_t *conf = NULL;
int op_errno = -1;
int i = -1;
+ int ret = -1;
+ dict_t *xattr_req = NULL;
VALIDATE_OR_GOTO(frame, err);
VALIDATE_OR_GOTO(this, err);
@@ -10285,14 +10549,37 @@ dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
if (flags) {
return dht_rmdir_do(frame, this);
}
+ if (xdata) {
+ xattr_req = dict_ref(xdata);
+ } else {
+ xattr_req = dict_new();
+ }
+ if (xattr_req) {
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ /* If parallel-readdir is enabled, this is required
+ * to handle stale linkto files in the directory
+ * being deleted. If this fails, log an error but
+ * do not prevent the operation.
+ */
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
for (i = 0; i < conf->subvolume_cnt; i++) {
STACK_WIND_COOKIE(frame, dht_rmdir_opendir_cbk, conf->subvolumes[i],
conf->subvolumes[i],
conf->subvolumes[i]->fops->opendir, loc, local->fd,
- NULL);
+ xattr_req);
}
+ if (xattr_req) {
+ dict_unref(xattr_req);
+ }
return 0;
err:
@@ -10302,7 +10589,7 @@ err:
return 0;
}
-int
+static int
dht_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -10362,7 +10649,7 @@ err:
return 0;
}
-int
+static int
dht_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -10409,7 +10696,7 @@ err:
return 0;
}
-int32_t
+static int32_t
dht_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
{
@@ -10526,23 +10813,17 @@ dht_notify(xlator_t *this, int event, void *data, ...)
int had_heard_from_all = 0;
int have_heard_from_all = 0;
- struct timeval time = {
- 0,
- };
gf_defrag_info_t *defrag = NULL;
dict_t *dict = NULL;
gf_defrag_type cmd = 0;
dict_t *output = NULL;
va_list ap;
- dht_methods_t *methods = NULL;
struct gf_upcall *up_data = NULL;
struct gf_upcall_cache_invalidation *up_ci = NULL;
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- methods = &(conf->methods);
-
/* had all subvolumes reported status once till now? */
had_heard_from_all = 1;
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -10572,12 +10853,11 @@ dht_notify(xlator_t *this, int event, void *data, ...)
break;
}
- gettimeofday(&time, NULL);
LOCK(&conf->subvolume_lock);
{
conf->subvolume_status[cnt] = 1;
conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
+ conf->subvol_up_time[cnt] = gf_time();
}
UNLOCK(&conf->subvolume_lock);
@@ -10685,21 +10965,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
if (defrag->is_exiting)
goto unlock;
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
- (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_DETACH_STATUS))
gf_defrag_status_get(conf, output);
- else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
- gf_defrag_start_detach_tier(defrag);
else if (cmd == GF_DEFRAG_CMD_DETACH_START)
defrag->cmd = GF_DEFRAG_CMD_DETACH_START;
else if (cmd == GF_DEFRAG_CMD_STOP ||
- cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER ||
cmd == GF_DEFRAG_CMD_DETACH_STOP)
gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output);
- else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)
- ret = gf_defrag_pause_tier(this, defrag);
- else if (cmd == GF_DEFRAG_CMD_RESUME_TIER)
- ret = gf_defrag_resume_tier(this, defrag);
}
unlock:
UNLOCK(&defrag->lock);
@@ -10747,6 +11019,7 @@ dht_notify(xlator_t *this, int event, void *data, ...)
}
if (!had_heard_from_all && have_heard_from_all) {
+ static int run_defrag = 0;
/* This is the first event which completes aggregation
of events from all subvolumes. If at least one subvol
had come up, propagate CHILD_UP, but only this time
@@ -10773,15 +11046,13 @@ dht_notify(xlator_t *this, int event, void *data, ...)
* thread has already started.
*/
if (conf->defrag && !run_defrag) {
- if (methods->migration_needed(this)) {
- run_defrag = 1;
- ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
- this, "dhtdg");
- if (ret) {
- GF_FREE(conf->defrag);
- conf->defrag = NULL;
- kill(getpid(), SIGTERM);
- }
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
}
}
}
@@ -10850,8 +11121,8 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
len += ret;
/* Calculation of total length of the string required to calloc
- * output_string. Log includes subvolume-name, start-range, end-range and
- * err value.
+ * output_string. Log includes subvolume-name, start-range, end-range
+ * and err value.
*
* This log will help to debug cases where:
* a) Different processes set different layout of a directory.
@@ -10862,8 +11133,7 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
for (i = 0; i < layout->cnt; i++) {
ret = snprintf(string, sizeof(string),
"[Subvol_name: %s, Err: %d , Start: "
- "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32
- " ], ",
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
layout->list[i].xlator->name, layout->list[i].err,
layout->list[i].start, layout->list[i].stop,
layout->list[i].commit_hash);
@@ -10892,8 +11162,7 @@ dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
for (i = 0; i < layout->cnt; i++) {
ret = snprintf(output_string + off, len - off,
"[Subvol_name: %s, Err: %d , Start: "
- "%" PRIu32 " , Stop: %" PRIu32 " , Hash: %" PRIu32
- " ], ",
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
layout->list[i].xlator->name, layout->list[i].err,
layout->list[i].start, layout->list[i].stop,
layout->list[i].commit_hash);
@@ -10928,28 +11197,6 @@ out:
return ret;
}
-int32_t
-dht_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO("dht", conf, out);
- GF_VALIDATE_OR_GOTO("dht", conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-
-out:
- return ret;
-}
-
/*
This function should not be called more then once during a FOP
handling path. It is valid only for for ops on files
@@ -10984,72 +11231,13 @@ dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
return 0;
}
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this)
-{
- if (strcmp(this->type, "cluster/tier") == 0)
- return _gf_true;
- return _gf_false;
-}
-
int32_t
dht_release(xlator_t *this, fd_t *fd)
{
return dht_fd_ctx_destroy(this, fd);
}
-int
-dht_remove_stale_linkto(void *data)
-{
- call_frame_t *frame = NULL;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- dict_t *xdata_in = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO("dht", data, out);
-
- frame = data;
- local = frame->local;
- this = frame->this;
- GF_VALIDATE_OR_GOTO("dht", this, out);
- GF_VALIDATE_OR_GOTO("dht", local, out);
- GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
-
- xdata_in = dict_new();
- if (!xdata_in)
- goto out;
-
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
- "Failed to set keys for stale linkto"
- "deletion on path %s",
- local->loc.path);
- goto out;
- }
-
- ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
- "Removal of linkto failed"
- " on path %s at subvol %s",
- local->loc.path, local->link_subvol->name);
- }
-out:
- if (xdata_in)
- dict_unref(xdata_in);
- return ret;
-}
-
-int
-dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
-{
- DHT_STACK_DESTROY(sync_frame);
- return 0;
-}
-
-int
+static int
dht_pt_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, inode_t *inode, struct iatt *stbuf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
@@ -11135,6 +11323,8 @@ dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
conf = this->private;
dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
+ dict_del(xattr, conf->commithash_xattr_name);
if (frame->root->pid >= 0) {
GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
@@ -11180,3 +11370,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
return 0;
}
+
+/* The job of this function is to check if all the xlators have updated
+ * error in the layout. */
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ layout = dht_layout_get(this, inode);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == 0) {
+ return 0;
+ }
+ }
+
+ /* Returning the first xlator error as all xlators have errors */
+ return layout->list[0].err;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 580f57e6e25..fe0dc3db34a 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -9,24 +9,21 @@
*/
#include <regex.h>
-#include <signal.h>
-#include <fnmatch.h>
#include "dht-mem-types.h"
#include "dht-messages.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#include "libxlator.h"
-#include "syncop.h"
-#include "refcount.h"
-#include "timer.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/refcount.h>
+#include <glusterfs/timer.h>
#include "protocol-common.h"
-#include "glusterfs-acl.h"
+#include <glusterfs/glusterfs-acl.h>
#ifndef _DHT_H
#define _DHT_H
#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_XATTR_TIER_LAYOUT_FIXED_KEY "trusted.tier.fix.layout.complete"
#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
#define DHT_MDS_STR "mds"
#define GF_DHT_LOOKUP_UNHASHED_OFF 0
@@ -38,22 +35,21 @@
#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
/* Namespace synchronization */
#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
-#define TIERING_MIGRATION_KEY "tiering.migration"
#define DHT_LAYOUT_HASH_INVALID 1
#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
#define DHT_DIR_STAT_BLOCKS 8
#define DHT_DIR_STAT_SIZE 4096
+/* Virtual xattr for subvols status */
+
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
+
/* Virtual xattrs for debugging */
#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-/* Array to hold custom xattr keys
- */
-extern char *xattrs_to_heal[];
-
/* Rebalance nodeuuid flags */
#define REBAL_NODEUUID_MINE 0x01
@@ -152,8 +148,8 @@ struct dht_rebalance_ {
dict_t *xdata;
dict_t *xattr;
dict_t *dict;
- int32_t set;
struct gf_flock flock;
+ int32_t set;
int lock_cmd;
};
@@ -176,24 +172,24 @@ typedef enum {
} dht_reaction_type_t;
struct dht_skip_linkto_unlink {
- gf_boolean_t handle_valid_link;
- int opend_fd_count;
xlator_t *hash_links_to;
uuid_t cached_gfid;
uuid_t hashed_gfid;
+ int opend_fd_count;
+ gf_boolean_t handle_valid_link;
};
typedef struct {
xlator_t *xl;
loc_t loc; /* contains/points to inode to lock on. */
- short type; /* read/write lock. */
char *domain; /* Only locks within a single domain
* contend with each other
*/
char *basename; /* Required for entrylk */
- gf_lkowner_t lk_owner;
gf_boolean_t locked;
dht_reaction_type_t do_on_failure;
+ short type; /* read/write lock. */
+ gf_lkowner_t lk_owner;
} dht_lock_t;
/* The lock structure represents inodelk. */
@@ -244,23 +240,10 @@ typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
dht_layout_t **inmem,
dht_layout_t **ondisk);
-typedef struct {
- uint64_t blocks_used;
- uint64_t pblocks_used;
- uint64_t files_used;
- uint64_t pfiles_used;
- uint64_t unhashed_blocks_used;
- uint64_t unhashed_pblocks_used;
- uint64_t unhashed_files_used;
- uint64_t unhashed_pfiles_used;
- uint64_t unhashed_fsid;
- uint64_t hashed_fsid;
-} tier_statvfs_t;
-
struct dht_local {
- int call_cnt;
loc_t loc;
loc_t loc2;
+ int call_cnt;
int op_ret;
int op_errno;
int layout_mismatch;
@@ -274,7 +257,6 @@ struct dht_local {
struct iatt preparent;
struct iatt postparent;
struct statvfs statvfs;
- tier_statvfs_t tier_statvfs;
fd_t *fd;
inode_t *inode;
dict_t *params;
@@ -290,9 +272,6 @@ struct dht_local {
xlator_t *cached_subvol;
xlator_t *hashed_subvol;
xlator_t *mds_subvol; /* This is use for dir only */
- char need_selfheal;
- char need_xattr_heal;
- char need_attrheal;
int file_count;
int dir_count;
call_frame_t *main_frame;
@@ -310,12 +289,12 @@ struct dht_local {
uint32_t overlaps_cnt;
uint32_t down;
uint32_t misc;
- uint32_t missing_cnt;
dht_selfheal_dir_cbk_t dir_cbk;
dht_selfheal_layout_t healer;
dht_need_heal_t should_heal;
- gf_boolean_t force_mkdir;
dht_layout_t *layout, *refreshed_layout;
+ uint32_t missing_cnt;
+ gf_boolean_t force_mkdir;
} selfheal;
dht_refresh_layout_unlock refresh_layout_unlock;
@@ -323,7 +302,13 @@ struct dht_local {
uint32_t uid;
uint32_t gid;
- pid_t pid;
+ pid_t pid;
+
+ glusterfs_fop_t fop;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
/* needed by nufa */
int32_t flags;
@@ -331,44 +316,25 @@ struct dht_local {
dev_t rdev;
mode_t umask;
- /* need for file-info */
- char *xattr_val;
- char *key;
-
/* which xattr request? */
char xsel[256];
int32_t alloc_len;
/* gfid related */
uuid_t gfid;
+ uuid_t gfid_req;
- /* flag used to make sure we need to return estale in
- {lookup,revalidate}_cbk */
- char return_estale;
- char need_lookup_everywhere;
-
- glusterfs_fop_t fop;
-
- gf_boolean_t linked;
xlator_t *link_subvol;
struct dht_rebalance_ rebalance;
xlator_t *first_up_subvol;
- gf_boolean_t quota_deem_statfs;
-
- gf_boolean_t added_link;
- gf_boolean_t is_linkfile;
-
struct dht_skip_linkto_unlink skip_unlink;
dht_dir_transaction_t lock[2], *current;
/* inodelks during filerename for backward compatibility */
dht_lock_t **rename_inodelk_backward_compatible;
- int rename_inodelk_bc_count;
-
- short lock_type;
call_stub_t *stub;
int32_t parent_disk_layout[4];
@@ -376,15 +342,30 @@ struct dht_local {
/* rename rollback */
int *ret_cache;
- /* fd open check */
- gf_boolean_t fd_checked;
+ loc_t loc2_copy;
+
+ int rename_inodelk_bc_count;
/* This is use only for directory operation */
int32_t valid;
- gf_boolean_t heal_layout;
int32_t mds_heal_fresh_lookup;
- loc_t loc2_copy;
+ short lock_type;
+ char need_selfheal;
+ char need_xattr_heal;
+ char need_attrheal;
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+ /* fd open check */
+ gf_boolean_t fd_checked;
+ gf_boolean_t linked;
+ gf_boolean_t added_link;
+ gf_boolean_t is_linkfile;
+ gf_boolean_t quota_deem_statfs;
+ gf_boolean_t heal_layout;
gf_boolean_t locked;
gf_boolean_t dont_create_linkto;
+ gf_boolean_t gfid_missing;
};
typedef struct dht_local dht_local_t;
@@ -408,14 +389,7 @@ enum gf_defrag_type {
GF_DEFRAG_CMD_STATUS = 1 + 2,
GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
GF_DEFRAG_CMD_START_FORCE = 1 + 4,
- GF_DEFRAG_CMD_START_TIER = 1 + 5,
- GF_DEFRAG_CMD_STATUS_TIER = 1 + 6,
- GF_DEFRAG_CMD_START_DETACH_TIER = 1 + 7,
- GF_DEFRAG_CMD_STOP_DETACH_TIER = 1 + 8,
- GF_DEFRAG_CMD_PAUSE_TIER = 1 + 9,
- GF_DEFRAG_CMD_RESUME_TIER = 1 + 10,
GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
- GF_DEFRAG_CMD_STOP_TIER = 1 + 12,
GF_DEFRAG_CMD_DETACH_START = 1 + 13,
GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
@@ -466,75 +440,6 @@ struct dht_container {
int local_subvol_index;
};
-typedef enum tier_mode_ {
- TIER_MODE_NONE = 0,
- TIER_MODE_TEST,
- TIER_MODE_WM
-} tier_mode_t;
-
-typedef enum tier_pause_state_ {
- TIER_RUNNING = 0,
- TIER_REQUEST_PAUSE,
- TIER_PAUSED
-} tier_pause_state_t;
-
-/* This Structure is only used in tiering fixlayout */
-typedef struct gf_tier_fix_layout_arg {
- xlator_t *this;
- dict_t *fix_layout;
- pthread_t thread_id;
-} gf_tier_fix_layout_arg_t;
-
-typedef struct gf_tier_conf {
- int is_tier;
- int watermark_hi;
- int watermark_low;
- int watermark_last;
- unsigned long block_size;
- fsblkcnt_t blocks_total;
- fsblkcnt_t blocks_used;
- int percent_full;
- uint64_t max_migrate_bytes;
- int max_migrate_files;
- int query_limit;
- tier_mode_t mode;
- /* These flags are only used for tier-compact */
- gf_boolean_t compact_active;
- /* These 3 flags are set to true when the client changes the */
- /* compaction mode on the command line. */
- /* When they are set, the daemon will trigger compaction as */
- /* soon as possible to activate or deactivate compaction. */
- /* If in the middle of a compaction, then the switches take */
- /* effect on the next compaction, not the current one. */
- /* If the user switches it off, we want to avoid needless */
- /* compactions. */
- /* If the user switches it on, they want to compact as soon */
- /* as possible. */
- gf_boolean_t compact_mode_switched;
- gf_boolean_t compact_mode_switched_hot;
- gf_boolean_t compact_mode_switched_cold;
- int tier_max_promote_size;
- int tier_promote_frequency;
- int tier_demote_frequency;
- int tier_compact_hot_frequency;
- int tier_compact_cold_frequency;
- uint64_t st_last_promoted_size;
- uint64_t st_last_demoted_size;
- tier_pause_state_t pause_state;
- struct synctask *pause_synctask;
- gf_timer_t *pause_timer;
- pthread_mutex_t pause_mutex;
- int promote_in_progress;
- int demote_in_progress;
- /* This Structure is only used in tiering fixlayout */
- gf_tier_fix_layout_arg_t tier_fix_layout_arg;
- /* Indicates the index of the first queryfile picked
- * in the last cycle of promote or demote */
- int32_t last_promote_qfile_index;
- int32_t last_demote_qfile_index;
- char volname[GD_VOLUME_NAME_MAX + 1];
-} gf_tier_conf_t;
-
typedef struct nodeuuid_info {
char info; /* Set to 1 is this is my node's uuid*/
uuid_t uuid; /* Store the nodeuuid as well for debugging*/
@@ -554,26 +459,18 @@ struct gf_defrag_info_ {
uint64_t num_dirs_processed;
uint64_t size_processed;
gf_lock_t lock;
- int cmd;
pthread_t th;
- gf_defrag_status_t defrag_status;
struct rpc_clnt *rpc;
uint32_t connected;
uint32_t is_exiting;
pid_t pid;
+ int cmd;
inode_t *root_inode;
uuid_t node_uuid;
- struct timeval start_time;
- gf_boolean_t stats;
+ time_t start_time;
uint32_t new_commit_hash;
+ gf_defrag_status_t defrag_status;
gf_defrag_pattern_list_t *defrag_pattern;
- gf_tier_conf_t tier_conf;
-
- /*Data Tiering params for scanner*/
- uint64_t total_files_promoted;
- uint64_t total_files_demoted;
- int write_freq_threshold;
- int read_freq_threshold;
pthread_cond_t parallel_migration_cond;
pthread_mutex_t dfq_mutex;
@@ -588,18 +485,20 @@ struct gf_defrag_info_ {
/*Throttle params*/
/*stands for reconfigured thread count*/
int32_t recon_thread_count;
- /*stands for current running thread count*/
- int32_t current_thread_count;
pthread_cond_t df_wakeup_thread;
- /* lock migration flag */
- gf_boolean_t lock_migration_enabled;
-
/* backpointer to make it easier to write functions for rebalance */
xlator_t *this;
pthread_cond_t fc_wakeup_cond;
pthread_mutex_t fc_mutex;
+
+ /*stands for current running thread count*/
+ int32_t current_thread_count;
+
+ gf_boolean_t stats;
+ /* lock migration flag */
+ gf_boolean_t lock_migration_enabled;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
@@ -607,7 +506,6 @@ typedef struct gf_defrag_info_ gf_defrag_info_t;
struct dht_methods_s {
int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
- int32_t (*migration_needed)(xlator_t *this);
xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
const char *name);
};
@@ -615,36 +513,26 @@ struct dht_methods_s {
typedef struct dht_methods_s dht_methods_t;
struct dht_conf {
- gf_lock_t subvolume_lock;
- int subvolume_cnt;
xlator_t **subvolumes;
char *subvolume_status;
int *last_event;
dht_layout_t **file_layouts;
dht_layout_t **dir_layouts;
unsigned int search_unhashed;
- gf_boolean_t lookup_optimize;
int gen;
dht_du_t *du_stats;
double min_free_disk;
double min_free_inodes;
- char disk_unit;
+ int subvolume_cnt;
int32_t refresh_interval;
- gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
+ gf_lock_t subvolume_lock;
+ time_t last_stat_fetch;
gf_lock_t layout_lock;
dict_t *leaf_to_subvol;
void *private; /* Can be used by wrapper xlators over
dht */
- gf_boolean_t use_readdirp;
- char vol_uuid[UUID_SIZE + 1];
- gf_boolean_t assert_no_child_down;
time_t *subvol_up_time;
- /* This is the count used as the distribute layout for a directory */
- /* Will be a global flag to control the layout spread count */
- uint32_t dir_spread_cnt;
-
/* to keep track of nodes which are decommissioned */
xlator_t **decommissioned_bricks;
int decommission_in_progress;
@@ -653,15 +541,9 @@ struct dht_conf {
/* defrag related */
gf_defrag_info_t *defrag;
- /* Request to filter directory entries in readdir request */
-
- gf_boolean_t readdir_optimize;
-
/* Support regex-based name reinterpretation. */
regex_t rsync_regex;
- gf_boolean_t rsync_regex_valid;
regex_t extra_regex;
- gf_boolean_t extra_regex_valid;
/* Support variable xattr names. */
char *xattr_name;
@@ -670,11 +552,6 @@ struct dht_conf {
char *commithash_xattr_name;
char *wild_xattr_name;
- /* Support size-weighted rebalancing (heterogeneous bricks). */
- gf_boolean_t do_weighting;
- gf_boolean_t randomize_by_gfid;
- int dthrottle;
-
dht_methods_t methods;
struct mem_pool *lock_pool;
@@ -684,24 +561,55 @@ struct dht_conf {
subvol_nodeuuids_info_t *local_nodeuuids;
int32_t local_subvols_cnt;
+ int dthrottle;
+
+ /* Hard link handle requirement for migration triggered from client*/
+ synclock_t link_lock;
+
+ /* lock migration */
+ gf_lock_t lock;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
/*
* "Commit hash" for this volume topology. Changed whenever bricks
* are added or removed.
*/
uint32_t vol_commit_hash;
- gf_boolean_t vch_forced;
- /* lock migration */
+ char vol_uuid[UUID_SIZE + 1];
+
+ char disk_unit;
gf_boolean_t lock_migration_enabled;
- gf_lock_t lock;
- /* Hard link handle requirement for migration triggered from client*/
- synclock_t link_lock;
+ gf_boolean_t vch_forced;
gf_boolean_t use_fallocate;
gf_boolean_t force_migration;
+
+ gf_boolean_t lookup_optimize;
+
+ gf_boolean_t unhashed_sticky_bit;
+
+ gf_boolean_t assert_no_child_down;
+
+ gf_boolean_t use_readdirp;
+
+ /* Request to filter directory entries in readdir request */
+ gf_boolean_t readdir_optimize;
+
+ gf_boolean_t rsync_regex_valid;
+
+ gf_boolean_t extra_regex_valid;
+
+ /* Support size-weighted rebalancing (heterogeneous bricks). */
+ gf_boolean_t do_weighting;
+
+ gf_boolean_t randomize_by_gfid;
};
typedef struct dht_conf dht_conf_t;
@@ -740,6 +648,8 @@ struct dir_dfmeta {
struct list_head **head;
struct list_head **iterator;
int *fetch_entries;
+ /* fds corresponding to local subvols only */
+ fd_t **lfd;
};
typedef struct dht_migrate_info {
@@ -815,22 +725,18 @@ typedef struct dht_fd_ctx {
dht_local_wipe(__xl, __local); \
} while (0)
-#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) \
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, post) \
do { \
- LOCK(&inode->lock); \
- { \
- if (ctx_sec == new_sec) \
- new_nsec = max(new_nsec, ctx_nsec); \
- else if (ctx_sec > new_sec) { \
- new_sec = ctx_sec; \
- new_nsec = ctx_nsec; \
- } \
- if (post) { \
- ctx_sec = new_sec; \
- ctx_nsec = new_nsec; \
- } \
+ if (ctx_sec == new_sec) \
+ new_nsec = max(new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
} \
- UNLOCK(&inode->lock); \
} while (0)
#define is_greater_time(a, an, b, bn) \
@@ -875,7 +781,6 @@ dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
int
dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
loc_t *loc, dict_t *xattr);
-
xlator_t *
dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *buf,
dict_t *xattr);
@@ -893,9 +798,6 @@ int
dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
int32_t **disk_layout_p);
int
-dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
- void *disk_layout_raw, int disk_layout_len);
-int
dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
xlator_t *subvol, int32_t **disk_layout_p);
@@ -931,25 +833,17 @@ dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
loc_t *loc);
int
-dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc);
-int
dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc);
int
dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
loc_t *loc, dht_layout_t *layout);
-
-int
-dht_selfheal_directory_for_nameless_lookup(call_frame_t *frame,
- dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
-
int
dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
dht_layout_t *layout);
int
dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
loc_t *loc, dht_layout_t *layout);
-int
+void
dht_layout_sort_volname(dht_layout_t *layout);
int
@@ -966,14 +860,14 @@ dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx);
int
dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode);
int
-dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
-int
dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout);
;
void
dht_layout_unref(xlator_t *this, dht_layout_t *layout);
dht_layout_t *
dht_layout_ref(xlator_t *this, dht_layout_t *layout);
+int
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
xlator_t *
dht_first_up_subvol(xlator_t *this);
xlator_t *
@@ -1228,25 +1122,19 @@ dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
int
-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
-
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state);
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf);
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *defrag);
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata);
int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag);
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output);
@@ -1279,10 +1167,6 @@ int
dht_dir_attr_heal(void *data);
int
dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data);
-int
-dht_dir_has_layout(dict_t *xattr, char *name);
-gf_boolean_t
-dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
xlator_t *
dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
xlator_t *ignore, dht_layout_t *layout,
@@ -1291,15 +1175,18 @@ xlator_t *
dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
dht_layout_t *layout);
int
+dht_dir_has_layout(dict_t *xattr, char *name);
+int
dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this);
-void
-dht_layout_dump(dht_layout_t *layout, const char *prefix);
int32_t
dht_priv_dump(xlator_t *this);
int32_t
dht_inodectx_dump(xlator_t *this, inode_t *inode);
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
+
int
dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
xlator_t **src_subvol, xlator_t **dst_subvol);
@@ -1313,11 +1200,6 @@ dht_subvol_status(dht_conf_t *conf, xlator_t *subvol);
void
dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
dht_layout_t *layout);
-int
-dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
-
-int
-dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict);
int
dht_layout_sort(dht_layout_t *layout);
@@ -1334,9 +1216,6 @@ dht_layout_missing_dirs(dht_layout_t *layout);
int
dht_refresh_layout(call_frame_t *frame);
-gf_boolean_t
-dht_is_tier_xlator(xlator_t *this);
-
int
dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
int32_t *op_errno);
@@ -1367,22 +1246,6 @@ dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
int
dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret);
-void
-dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
- unsigned long frsize);
-
-int
-add_opt(char **optsp, const char *opt);
-
-int
-dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value);
-
-int
-dht_remove_stale_linkto(void *data);
-
-int
-dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data);
-
int
dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *subvol);
@@ -1454,9 +1317,6 @@ dht_dir_heal_xattrs(void *data);
int
dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data);
-void
-dht_aggregate_xattr(dict_t *dst, dict_t *src);
-
int32_t
dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size);
@@ -1468,25 +1328,12 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
dict_t *src, int *uret, int *uflag);
int
-dht_dir_xattr_heal(xlator_t *this, dht_local_t *local);
-
-int32_t
-dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
- int *errst);
-
-xlator_t *
-dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc);
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
int
dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag);
int
-dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata);
-
-int
-dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
-int
dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol);
int
@@ -1495,14 +1342,6 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
/* Abstract out the DHT-IATT-IN-DICT */
-int
-dht_request_iatt_in_xdata(xlator_t *this, dict_t *xattr_req);
-
-int
-dht_read_iatt_from_xdata(xlator_t *this, dict_t *xdata, struct iatt *stbuf);
-
-int
-is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2);
void
dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout);
@@ -1523,4 +1362,23 @@ int
dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata);
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
+
+int
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol);
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 148b4f01133..c0588828fdb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -10,14 +10,10 @@
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "dht-messages.h"
-#include "defaults.h"
#include <sys/time.h>
-#include "events.h"
+#include <glusterfs/events.h>
int
dht_du_info_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
@@ -155,22 +151,18 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {
- 0,
- };
loc_t tmp_loc = {
0,
};
+ time_t now;
conf = this->private;
-
- gettimeofday(&tv, NULL);
-
+ now = gf_time();
/* make it root gfid, should be enough to get the proper
info back */
tmp_loc.gfid[15] = 1;
- if (tv.tv_sec > (conf->refresh_interval + conf->last_stat_fetch.tv_sec)) {
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
statfs_frame = copy_frame(frame);
if (!statfs_frame) {
goto err;
@@ -202,7 +194,7 @@ dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
statfs_local->params);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ conf->last_stat_fetch = now;
}
return 0;
err:
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 2f15c0370cc..acda67c312a 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -8,13 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "hashfn.h"
+#include <glusterfs/hashfn.h>
-int
-dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
+static int
+dht_hash_compute_internal(int type, const char *name, const int len,
+ uint32_t *hash_p)
{
int ret = 0;
uint32_t hash = 0;
@@ -22,7 +21,7 @@ dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
switch (type) {
case DHT_HASH_TYPE_DM:
case DHT_HASH_TYPE_DM_USER:
- hash = gf_dm_hashfn(name, strlen(name));
+ hash = gf_dm_hashfn(name, len);
break;
default:
ret = -1;
@@ -36,7 +35,12 @@ dht_hash_compute_internal(int type, const char *name, uint32_t *hash_p)
return ret;
}
-static gf_boolean_t
+/* The function returns:
+ * 0 : in case no munge took place
+ * >0 : the length (inc. terminating NULL!) of the newly modified string,
+ * if it was munged.
+ */
+static int
dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
{
regmatch_t matches[2] = {
@@ -54,14 +58,14 @@ dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
if (new_len < len) {
memcpy(modified, original + matches[1].rm_so, new_len);
modified[new_len] = '\0';
- return _gf_true;
+ return new_len + 1; /* +1 for the terminating NULL */
}
}
}
/* This is guaranteed safe because of how the dest was allocated. */
strcpy(modified, original);
- return _gf_false;
+ return 0;
}
int
@@ -70,36 +74,37 @@ dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
char *rsync_friendly_name = NULL;
dht_conf_t *priv = NULL;
size_t len = 0;
- gf_boolean_t munged = _gf_false;
+ int munged = 0;
priv = this->private;
+ if (name == NULL)
+ return -1;
+
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+
LOCK(&priv->lock);
{
if (priv->extra_regex_valid) {
- len = strlen(name) + 1;
- rsync_friendly_name = alloca(len);
munged = dht_munge_name(name, rsync_friendly_name, len,
&priv->extra_regex);
}
if (!munged && priv->rsync_regex_valid) {
- len = strlen(name) + 1;
- rsync_friendly_name = alloca(len);
gf_msg_trace(this->name, 0, "trying regex for %s", name);
munged = dht_munge_name(name, rsync_friendly_name, len,
&priv->rsync_regex);
- if (munged) {
- gf_msg_debug(this->name, 0, "munged down to %s",
- rsync_friendly_name);
- }
}
}
UNLOCK(&priv->lock);
-
- if (!munged) {
+ if (munged) {
+ gf_msg_debug(this->name, 0, "munged down to %s", rsync_friendly_name);
+ len = munged;
+ } else {
rsync_friendly_name = (char *)name;
}
- return dht_hash_compute_internal(type, rsync_friendly_name, hash_p);
+ return dht_hash_compute_internal(type, rsync_friendly_name, len - 1,
+ hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 22ce8bc4d4c..3f2fe43d5f3 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -8,10 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
#include "dht-lock.h"
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
static void
dht_free_fd_ctx(dht_fd_ctx_t *fd_ctx)
@@ -65,8 +64,8 @@ __dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
ret = __fd_ctx_set(fd, this, value);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
- "Failed to set fd ctx in fd=0x%p", fd);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
+ "fd=0x%p", fd, NULL);
GF_REF_PUT(fd_ctx);
}
out:
@@ -94,12 +93,13 @@ dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
goto unlock;
} else {
/* This would be a big problem*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
- "Different dst found in the fd ctx");
-
/* Overwrite and hope for the best*/
fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
- goto unlock;
+ UNLOCK(&fd->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
+ NULL);
+
+ goto out;
}
}
ret = __dht_fd_ctx_set(this, fd, dst);
@@ -124,13 +124,13 @@ dht_fd_ctx_get(xlator_t *this, fd_t *fd)
{
ret = __fd_ctx_get(fd, this, &tmp_val);
if ((ret < 0) || (tmp_val == 0)) {
- UNLOCK(&fd->lock);
- goto out;
+ goto unlock;
}
fd_ctx = (dht_fd_ctx_t *)(uintptr_t)tmp_val;
GF_REF_GET(fd_ctx);
}
+unlock:
UNLOCK(&fd->lock);
out:
@@ -365,10 +365,27 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
break;
+ case GF_FOP_FXATTROP:
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr,
+ local->fd, local->key, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ local->key, local->fd, local->rebalance.lock_cmd,
+ &local->rebalance.flock, local->xattr_req);
+ break;
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
- "Unknown FOP on fd (%p) on file %s @ %s", fd,
- uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
break;
}
@@ -428,10 +445,22 @@ handle_err:
DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
break;
+ case GF_FOP_FXATTROP:
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
+ break;
+
default:
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP,
- "Unknown FOP on fd (%p) on file %s @ %s", fd,
- uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
break;
}
@@ -484,10 +513,9 @@ dht_check_and_open_fd_on_subvol_task(void *data)
fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "Failed to open the fd"
- " (%p, flags=0%o) on file %s @ %s",
- fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ "fd=%p", fd, "flags=0%o", fd->flags, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), "name=%s", subvol->name, NULL);
/* This can happen if the cached subvol was updated in the
* inode_ctx and the fd was opened on the new cached suvol
* after this fop was wound on the old cached subvol.
@@ -533,10 +561,8 @@ dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame)
dht_check_and_open_fd_on_subvol_complete, frame, frame);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, 0,
- "Failed to create synctask"
- " to check and open fd=%p",
- local->fd);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ "to-check-and-open fd=%p", local->fd, NULL);
}
return ret;
@@ -645,9 +671,7 @@ dht_get_subvol_from_id(xlator_t *this, int client_id)
ret = gf_asprintf(&sid, "%d", client_id);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED,
- "asprintf failed while "
- "fetching subvol from the id");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, NULL);
goto out;
}
@@ -1260,6 +1284,7 @@ dht_migration_complete_check_task(void *data)
fd_t *tmp = NULL;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
int open_failed = 0;
this = THIS;
@@ -1306,9 +1331,9 @@ dht_migration_complete_check_task(void *data)
* migrated by two different layers. Raise
* a warning here.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO,
- "%s: Found miginfo in the inode ctx",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid));
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
miginfo = (void *)(uintptr_t)tmp_miginfo;
GF_REF_PUT(miginfo);
@@ -1329,10 +1354,9 @@ dht_migration_complete_check_task(void *data)
ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
- "%s: failed to lookup the file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- this->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", this->name, NULL);
local->op_errno = -ret;
ret = -1;
goto out;
@@ -1340,18 +1364,15 @@ dht_migration_complete_check_task(void *data)
dst_node = dht_subvol_get_cached(this, tmp_loc.inode);
if (linkto_target && dst_node != linkto_target) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
- "linkto target (%s) is "
- "different from cached-subvol (%s). Treating %s as "
- "destination subvol",
- linkto_target->name, dst_node->name, dst_node->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
+ "linkto_target_name=%s", linkto_target->name, "dst_name=%s",
+ dst_node->name, NULL);
}
if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "dst_name=%s", dst_node->name, NULL);
ret = -1;
local->op_errno = EIO;
goto out;
@@ -1398,24 +1419,34 @@ dht_migration_complete_check_task(void *data)
* the loop will cause the destruction of the fd. So we need to
* iterate the list safely because iter_fd cannot be trusted.
*/
- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
- {
- if (fd_is_anonymous(iter_fd))
- continue;
-
- if (dht_fd_open_on_dst(this, iter_fd, dst_node))
- continue;
-
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
/* We need to release the inode->lock before calling
* syncop_open() to avoid possible deadlocks. However this
* can cause the iter_fd to be released by other threads.
* To avoid this, we take a reference before releasing the
* lock.
*/
- __fd_ref(iter_fd);
+ fd_ref(iter_fd);
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (skip_open)
+ goto next;
+
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
@@ -1423,12 +1454,10 @@ dht_migration_complete_check_task(void *data)
(iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "failed"
- " to open the fd"
- " (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "id=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
open_failed = 1;
local->op_errno = -ret;
@@ -1437,9 +1466,11 @@ dht_migration_complete_check_task(void *data)
dht_fd_ctx_set(this, iter_fd, dst_node);
}
- fd_unref(iter_fd);
-
+ next:
LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
}
SYNCTASK_SETID(frame->root->uid, frame->root->gid);
@@ -1452,6 +1483,10 @@ dht_migration_complete_check_task(void *data)
unlock:
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
out:
if (dict) {
@@ -1533,6 +1568,7 @@ dht_rebalance_inprogress_task(void *data)
int open_failed = 0;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
this = THIS;
frame = data;
@@ -1575,9 +1611,9 @@ dht_rebalance_inprogress_task(void *data)
* migrated by two different layers. Raise
* a warning here.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO,
- "%s: Found miginfo in the inode ctx",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid));
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
miginfo = (void *)(uintptr_t)tmp_miginfo;
GF_REF_PUT(miginfo);
}
@@ -1586,17 +1622,16 @@ dht_rebalance_inprogress_task(void *data)
}
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
- "%s: failed to get the 'linkto' xattr", local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
ret = -1;
goto out;
}
dst_node = dht_linkfile_subvol(this, NULL, NULL, dict);
if (!dst_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_NOT_FOUND,
- "%s: failed to get the 'linkto' xattr from dict",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
ret = -1;
goto out;
}
@@ -1613,20 +1648,17 @@ dht_rebalance_inprogress_task(void *data)
/* lookup on dst */
ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_FILE_LOOKUP_ON_DST_FAILED,
- "%s: failed to lookup the file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
ret = -1;
goto out;
}
if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
- "%s: gfid different on the target file on %s",
- tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
ret = -1;
goto out;
}
@@ -1653,24 +1685,40 @@ dht_rebalance_inprogress_task(void *data)
* the loop will cause the destruction of the fd. So we need to
* iterate the list safely because iter_fd cannot be trusted.
*/
- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list)
- {
- if (fd_is_anonymous(iter_fd))
- continue;
-
- if (dht_fd_open_on_dst(this, iter_fd, dst_node))
- continue;
-
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
/* We need to release the inode->lock before calling
* syncop_open() to avoid possible deadlocks. However this
* can cause the iter_fd to be released by other threads.
* To avoid this, we take a reference before releasing the
* lock.
*/
- __fd_ref(iter_fd);
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
+
+ /* Yes, this is ugly but there isn't a cleaner way to do this
+ * the fd_ref is an atomic increment so not too bad. We want to
+ * reduce the number of inode locks and unlocks.
+ */
+
+ fd_ref(iter_fd);
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (skip_open)
+ goto next;
+
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
@@ -1678,11 +1726,10 @@ dht_rebalance_inprogress_task(void *data)
(iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_OPEN_FD_ON_DST_FAILED,
- "failed to send open "
- "the fd (%p, flags=0%o) on file %s @ %s",
- iter_fd, iter_fd->flags, path, dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "fd=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
ret = -1;
open_failed = 1;
} else {
@@ -1691,9 +1738,11 @@ dht_rebalance_inprogress_task(void *data)
dht_fd_ctx_set(this, iter_fd, dst_node);
}
- fd_unref(iter_fd);
-
+ next:
LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
}
SYNCTASK_SETID(frame->root->uid, frame->root->gid);
@@ -1701,6 +1750,10 @@ dht_rebalance_inprogress_task(void *data)
unlock:
UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
if (open_failed) {
ret = -1;
goto out;
@@ -1708,9 +1761,8 @@ unlock:
ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
- "%s: failed to set inode-ctx target file at %s", local->loc.path,
- dst_node->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "path=%s", local->loc.path, "name=%s", dst_node->name, NULL);
goto out;
}
@@ -1803,12 +1855,16 @@ dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
time = &ctx->time;
- DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
- stat->ia_mtime_nsec, inode, post);
- DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
- stat->ia_ctime_nsec, inode, post);
- DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
- stat->ia_atime_nsec, inode, post);
+ LOCK(&inode->lock);
+ {
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
+ stat->ia_mtime_nsec, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
+ stat->ia_ctime_nsec, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
+ stat->ia_atime_nsec, post);
+ }
+ UNLOCK(&inode->lock);
ret = dht_inode_ctx_set(inode, this, ctx);
out:
@@ -1877,9 +1933,7 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
};
char *bname = NULL;
char *save_ptr = NULL;
- uuid_t gfid = {
- 0,
- };
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
char *tmp_path = NULL;
tmp_path = gf_strdup(path);
@@ -1887,9 +1941,6 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
goto out;
}
- memset(gfid, 0, 16);
- gfid[15] = 1;
-
gf_uuid_copy(loc.pargfid, gfid);
loc.parent = inode_ref(itable->root);
@@ -1918,10 +1969,10 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
*/
linked_inode = loc.inode;
bname = strtok_r(NULL, "/", &save_ptr);
- inode_unref(loc.parent);
if (!bname) {
goto out;
}
+ inode_unref(loc.parent);
loc.parent = loc.inode;
gf_uuid_copy(loc.pargfid, loc.inode->gfid);
loc.inode = NULL;
@@ -1933,10 +1984,9 @@ dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Healing of path %s failed on subvolume %s for "
- "directory %s",
- path, this->name, bname);
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
+ "path=%s", path, "subvolume=%s", this->name, "bname=%s",
+ bname, NULL);
goto out;
}
@@ -1994,10 +2044,8 @@ dht_heal_full_path(void *data)
ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL,
NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Failed to get path from subvol %s. Aborting "
- "directory healing.",
- source->name);
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_HEAL_ABORT,
+ "subvol=%s", source->name, NULL);
goto out;
}
@@ -2035,6 +2083,7 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_local_t *local = NULL;
xlator_t *this = NULL;
int ret = -1;
+ int op_errno = 0;
local = heal_frame->local;
main_frame = local->main_frame;
@@ -2044,10 +2093,12 @@ dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
dht_set_fixed_dir_stat(&local->postparent);
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "xattr heal failed for directory %s ", local->loc.path);
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ NULL);
+ }
}
DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
@@ -2134,16 +2185,15 @@ dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
ret = __dht_lock_subvol_set(inode, this, cached_subvol);
if (ret) {
gf_uuid_unparse(inode->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
- "Failed to set lock_subvol in "
- "inode ctx for gfid %s",
- gfid);
- goto unlock;
+ UNLOCK(&inode->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "lock_subvol gfid=%s", gfid, NULL);
+ goto post_unlock;
}
subvol = cached_subvol;
}
-unlock:
UNLOCK(&inode->lock);
+post_unlock:
if (!subvol && inode && lock->l_type != F_UNLCK) {
inode_unref(inode);
}
@@ -2167,8 +2217,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
inode = local->loc.inode ? local->loc.inode : local->fd->inode;
}
if (!inode) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
- "Found a NULL inode. Failed to unref the inode");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ NULL);
goto out;
}
@@ -2194,11 +2244,8 @@ dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
inode_unref(inode);
} else {
gf_uuid_unparse(inode->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LOCK_INODE_UNREF_FAILED,
- "Unlock request failed for gfid %s."
- "Failed to unref the inode",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED, "gfid=%s", gfid, NULL);
goto out;
}
default:
@@ -2220,12 +2267,11 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
int luret = -1;
int luflag = -1;
int i = 0;
+ char **xattrs_to_heal;
if (!src || !dst) {
- gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
- "src or dst is NULL. Failed to set "
- " dictionary value for path %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED,
+ "path=%s", local->loc.path, NULL);
return;
}
/* Check if any user xattr present in src dict and set
@@ -2236,17 +2282,18 @@ dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
and set it to dst dict, here index start from 1 because
user xattr already checked in previous statement
*/
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
for (i = 1; xattrs_to_heal[i]; i++) {
keyval = dict_get(src, xattrs_to_heal[i]);
if (keyval) {
luflag = 1;
ret = dict_set(dst, xattrs_to_heal[i], keyval);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- xattrs_to_heal[i], local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "key=%s", xattrs_to_heal[i],
+ "path=%s", local->loc.path, NULL);
keyval = NULL;
}
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index f46370a9208..dbb8070b0da 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -10,25 +10,25 @@
#include "dht-common.h"
-int
+static int
dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
-int
+static int
dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
int ret);
-int
+static int
dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, fd_t *fd, dict_t *xdata)
{
@@ -67,7 +67,7 @@ out:
return 0;
}
-int
+static int
dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -162,8 +162,8 @@ dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local = frame->local;
prev = cookie;
- if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) && (op_errno == EBADF) &&
- !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSTAT) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -216,7 +216,7 @@ err:
return 0;
}
-int
+static int
dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -258,7 +258,7 @@ out:
return 0;
}
-int
+static int
dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct iatt *stbuf, dict_t *xdata)
{
@@ -272,19 +272,19 @@ dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->name);
- goto unlock;
+ goto post_unlock;
}
dht_iatt_merge(this, &local->stbuf, stbuf);
local->op_ret = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
DHT_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
@@ -431,7 +431,7 @@ dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->call_cnt != 1)
goto out;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -473,7 +473,7 @@ out:
return 0;
}
-int
+static int
dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -558,7 +558,7 @@ err:
return 0;
}
-int
+static int
dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, dict_t *xdata)
{
@@ -606,7 +606,7 @@ out:
return 0;
}
-int
+static int
dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -703,7 +703,7 @@ dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
if (local->call_cnt != 1)
goto out;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -735,7 +735,7 @@ out:
return 0;
}
-int
+static int
dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -820,7 +820,7 @@ dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local->op_errno = op_errno;
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -881,7 +881,7 @@ out:
return 0;
}
-int
+static int
dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -959,7 +959,7 @@ err:
/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
indicate that lock migration happened on the fd, so we can consider it as
phase 2 of migration */
-int
+static int
dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct gf_flock *flock, dict_t *xdata)
{
@@ -1006,7 +1006,7 @@ out:
return 0;
}
-int
+static int
dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1087,7 +1087,7 @@ err:
return 0;
}
-int
+static int
dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, struct gf_lease *lease, dict_t *xdata)
{
@@ -1129,7 +1129,7 @@ err:
}
/* Symlinks are currently not migrated, so no need for any check here */
-int
+static int
dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, const char *path, struct iatt *stbuf,
dict_t *xdata)
@@ -1192,6 +1192,29 @@ err:
return 0;
}
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ * This will return a dummy iatt with only the mode and type set
+ */
+static int
+dht_read_iatt_from_xdata(dict_t *xdata, struct iatt *stbuf)
+{
+ int ret = -1;
+ int32_t mode = 0;
+
+ ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
+
+ if (ret) {
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ } else {
+ stbuf->ia_prot = ia_prot_from_st_mode(mode);
+ stbuf->ia_type = ia_type_from_st_mode(mode);
+ }
+
+ return ret;
+}
+
int
dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict,
@@ -1223,7 +1246,14 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->call_cnt != 1)
goto out;
- ret = dht_read_iatt_from_xdata(this, xdata, &stbuf);
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ ret = dht_read_iatt_from_xdata(xdata, &stbuf);
if ((!op_ret) && (ret)) {
/* This is a potential problem and can cause corruption
@@ -1275,7 +1305,7 @@ out:
return 0;
}
-int
+static int
dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
int ret)
{
@@ -1334,7 +1364,7 @@ out:
return 0;
}
-int
+static int
dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
@@ -1342,6 +1372,22 @@ dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ */
+static int
+dht_request_iatt_in_xdata(dict_t *xattr_req)
+{
+ int ret = -1;
+
+ ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
+ ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ /* At least one call succeeded */
+ return ret;
+}
+
int
dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
@@ -1384,7 +1430,7 @@ dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
local->rebalance.xattr = dict_ref(dict);
local->rebalance.flags = flags;
- ret = dht_request_iatt_in_xdata(this, local->xattr_req);
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
if (ret) {
gf_msg_debug(this->name, 0,
@@ -1406,7 +1452,7 @@ err:
return 0;
}
-int
+static int
dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
@@ -1454,7 +1500,7 @@ dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
local->rebalance.xattr = dict_ref(dict);
local->rebalance.flags = flags;
- ret = dht_request_iatt_in_xdata(this, local->xattr_req);
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
if (ret) {
gf_msg_debug(this->name, 0, "Failed to set dictionary key %s fd=%p",
@@ -1479,7 +1525,7 @@ err:
* below fops, hence not implementing 'migration' related checks
*/
-int
+static int
dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -1535,8 +1581,26 @@ dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+
+ local = frame->local;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+out:
dht_lk_inode_unref(frame, op_ret);
DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+
return 0;
}
@@ -1574,6 +1638,13 @@ dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
if (ret)
goto err;
*/
+ local->rebalance.flock = *lock;
+ local->rebalance.lock_cmd = cmd;
+ local->key = gf_strdup(volume);
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
STACK_WIND(frame, dht_finodelk_cbk, lock_subvol,
lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata);
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index d0d12fd7658..2f23ce90fbd 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -10,17 +10,17 @@
#include "dht-common.h"
-int
+static int
dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
-int
+static int
dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
int
@@ -49,7 +49,7 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could be a valid bad fd error.
*/
- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -93,30 +93,28 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
- if (!dht_is_tier_xlator(this)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
- "insufficient memory");
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
-
- ret = dict_set_uint32(local->xattr_req,
- GF_PROTECT_FROM_EXTERNAL_WRITES, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
- "Failed to set key %s in dictionary",
- GF_PROTECT_FROM_EXTERNAL_WRITES);
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
local->op_errno = ENOMEM;
local->op_ret = -1;
goto out;
}
}
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
dht_iatt_merge(this, &local->stbuf, postbuf);
dht_iatt_merge(this, &local->prebuf, prebuf);
@@ -142,7 +140,7 @@ out:
return 0;
}
-int
+static int
dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -262,8 +260,8 @@ dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could actually be a valid error.
*/
- if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) &&
- ((op_errno == EBADF) || (op_errno == EINVAL)) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FTRUNCATE) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -336,7 +334,7 @@ err:
return 0;
}
-int
+static int
dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -489,7 +487,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -555,7 +553,7 @@ err:
return 0;
}
-int
+static int
dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -666,7 +664,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* and a lookup updated the cached subvol in the inode ctx.
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -731,7 +729,7 @@ err:
return 0;
}
-int
+static int
dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -838,7 +836,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
* and a lookup updated the cached subvol in the inode ctx.
* We only check once as this could actually be a valid error.
*/
- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) {
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -902,7 +900,7 @@ err:
return 0;
}
-int
+static int
dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1005,8 +1003,8 @@ dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
- if ((local->fop == GF_FOP_FSETATTR) && (op_ret == -1) &&
- (op_errno == EBADF) && !(local->fd_checked)) {
+ if ((local->fop == GF_FOP_FSETATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
goto out;
@@ -1049,7 +1047,7 @@ out:
return 0;
}
-int
+static int
dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
dht_local_t *local = NULL;
@@ -1113,9 +1111,10 @@ dht_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
if (op_ret == -1) {
local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
prev->name);
- goto unlock;
+ goto post_unlock;
}
dht_iatt_merge(this, &local->prebuf, statpre);
@@ -1124,9 +1123,8 @@ dht_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local->op_ret = 0;
local->op_errno = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
if (local->op_ret == 0)
@@ -1151,24 +1149,22 @@ dht_non_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
+ if (op_ret == -1) {
+ gf_msg(this->name, op_errno, 0, 0, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
+ }
+
LOCK(&frame->lock);
{
- if (op_ret == -1) {
- gf_msg(this->name, op_errno, 0, 0, "subvolume %s returned -1",
- prev->name);
-
- goto unlock;
- }
-
dht_iatt_merge(this, &local->prebuf, statpre);
dht_iatt_merge(this, &local->stbuf, statpost);
local->op_ret = 0;
local->op_errno = 0;
}
-unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
this_call_cnt = dht_frame_return(frame);
if (is_last_call(this_call_cnt)) {
dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf);
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 544b9638104..fda904c92c9 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -8,11 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "byte-order.h"
-#include "dht-messages.h"
+#include <glusterfs/byte-order.h>
#include "unittest/unittest.h"
#define layout_base_size (sizeof(dht_layout_t))
@@ -134,9 +131,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
ret = dht_hash_compute(this, layout->type, name, &hash);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
- "hash computation failed for type=%d name=%s", layout->type,
- name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
+ "type=%d", layout->type, "name=%s", name, NULL);
goto out;
}
@@ -148,8 +144,8 @@ dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
}
if (!subvol) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "no subvolume for hash (value) = %u", hash);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "hash-value=0x%x", hash, NULL);
}
out:
@@ -258,7 +254,7 @@ dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
return dht_disk_layout_extract(this, layout, i, disk_layout_p);
}
-int
+static int
dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
void *disk_layout_raw, int disk_layout_len)
{
@@ -269,8 +265,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
int disk_layout[4];
if (!disk_layout_raw) {
- gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
- "error no layout on disk for merge");
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ NULL);
return -1;
}
@@ -287,10 +283,8 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
case DHT_HASH_TYPE_DM:
break;
default:
- gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
- "Invalid disk layout: "
- "Catastrophic error layout with unknown type found %d",
- disk_layout[1]);
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
+ "layout=%d", disk_layout[1], NULL);
return -1;
}
@@ -302,9 +296,10 @@ dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
layout->list[pos].start = start_off;
layout->list[pos].stop = stop_off;
- gf_msg_trace(
- this->name, 0, "merged to layout: %u - %u (type %d, hash %d) from %s",
- start_off, stop_off, commit_hash, type, layout->list[pos].xlator->name);
+ gf_msg_trace(this->name, 0,
+ "merged to layout: 0x%x - 0x%x (hash 0x%x, type %d) from %s",
+ start_off, stop_off, commit_hash, type,
+ layout->list[pos].xlator->name);
return 0;
}
@@ -357,8 +352,8 @@ dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw,
disk_layout_len);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
- "layout merge from subvolume %s failed", subvol->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "subvolume=%s", subvol->name, NULL);
goto out;
}
@@ -417,8 +412,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j)
layout->list[j].start = start_swap;
layout->list[j].stop = stop_swap;
}
-
-int64_t
+static int64_t
dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j)
{
return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name));
@@ -441,7 +435,7 @@ dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator)
return _gf_false;
}
-int64_t
+static int64_t
dht_layout_entry_cmp(dht_layout_t *layout, int i, int j)
{
int64_t diff = 0;
@@ -477,7 +471,7 @@ dht_layout_sort(dht_layout_t *layout)
return 0;
}
-int
+void
dht_layout_sort_volname(dht_layout_t *layout)
{
int i = 0;
@@ -493,8 +487,6 @@ dht_layout_sort_volname(dht_layout_t *layout)
dht_layout_entry_swap(layout, i, j);
}
}
-
- return 0;
}
void
@@ -627,8 +619,8 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
ret = dht_layout_sort(layout);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
- "sort failed?! how the ....");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
+ NULL);
goto out;
}
@@ -644,10 +636,9 @@ dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
" gfid = %s",
loc->path, gfid);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
- "Found anomalies in %s (gfid = %s). "
- "Holes=%d overlaps=%d",
- loc->path, gfid, holes, overlaps);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
+ "path=%s", loc->path, "gfid=%s", gfid, "holes=%d", holes,
+ "overlaps=%d", overlaps, NULL);
}
ret = -1;
}
@@ -714,12 +705,11 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (!xattr) {
if (err == 0) {
if (loc) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED,
- "%s: xattr dictionary is NULL", loc->path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path=%s", loc->path, NULL);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DICT_GET_FAILED,
- "path not found: "
- "xattr dictionary is NULL");
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path not found", NULL);
}
ret = -1;
}
@@ -731,13 +721,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if (dict_ret < 0) {
if (err == 0 && layout->list[pos].stop) {
if (loc) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
- "%s: Disk layout missing, gfid = %s", loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
} else {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
- "path not found: "
- "Disk layout missing, gfid = %s",
- gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path not found"
+ "gfid=%s",
+ gfid, NULL);
}
ret = -1;
}
@@ -753,13 +743,13 @@ dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
if ((layout->list[pos].start != start_off) ||
(layout->list[pos].stop != stop_off) ||
(layout->list[pos].commit_hash != commit_hash)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO,
- "subvol: %s; inode layout - %" PRIu32 " - %" PRIu32 " - %" PRIu32
- "; "
- "disk layout - %" PRIu32 " - %" PRIu32 " - %" PRIu32,
- layout->list[pos].xlator->name, layout->list[pos].start,
- layout->list[pos].stop, layout->list[pos].commit_hash, start_off,
- stop_off, commit_hash);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, "subvol=%s",
+ layout->list[pos].xlator->name, "inode-layout:start=0x%x",
+ layout->list[pos].start, "inode-layout:stop=0x%x",
+ layout->list[pos].stop, "layout-commit-hash=0x%x; ",
+ layout->list[pos].commit_hash, "disk-layout:start-off=0x%x",
+ start_off, "disk-layout:top-off=0x%x", stop_off,
+ "commit-hash=0x%x", commit_hash, NULL);
ret = 1;
} else {
ret = 0;
@@ -781,9 +771,8 @@ dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode)
layout = dht_layout_for_subvol(this, subvol);
if (!layout) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
+ "subvolume=%s", subvol ? subvol->name : "<nil>", NULL);
ret = -1;
goto out;
}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index eb1695f7e05..89ec6cca56e 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -8,13 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "compat.h"
+#include <glusterfs/compat.h>
#include "dht-common.h"
-#include "dht-messages.h"
-int
+static int
dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
@@ -37,17 +34,16 @@ dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
if (!is_linkfile)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
- "got non-linkfile %s:%s, gfid = %s", prev->name, local->loc.path,
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "name=%s", prev->name, "path=%s", local->loc.path, "gfid=%s",
+ gfid, NULL);
out:
local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
stbuf, postparent, postparent, xattr);
return 0;
}
-#define is_equal(a, b) ((a) == (b))
-int
+static int
dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -76,9 +72,8 @@ dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value. key : %s",
- conf->link_xattr_name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "mame=%s", conf->link_xattr_name, NULL);
goto out;
}
@@ -128,27 +123,23 @@ dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: "
- "key = gfid-req, gfid = %s ",
- loc->path, gfid);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
} else {
gf_uuid_unparse(loc->gfid, gfid);
}
ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret)
- gf_msg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
ret = dict_set_str(dict, conf->link_xattr_name, tovol->name);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
- "%s: failed to initialize linkfile data, gfid = %s", loc->path,
- gfid);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -189,10 +180,9 @@ dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
- "Unlinking linkfile %s (gfid = %s)on "
- "subvolume %s failed ",
- local->loc.path, gfid, subvol->name);
+ gf_smsg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, "subvolume=%s",
+ subvol->name, NULL);
}
DHT_STACK_DESTROY(frame);
@@ -260,7 +250,7 @@ out:
return subvol;
}
-int
+static int
dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
struct iatt *statpost, dict_t *xdata)
@@ -272,10 +262,9 @@ dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
loc = &local->loc;
if (op_ret)
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
- "Failed to set attr uid/gid on %s"
- " :<gfid:%s> ",
- (loc->path ? loc->path : "NULL"), uuid_utoa(local->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
+ "path=%s", (loc->path ? loc->path : "NULL"), "gfid=%s",
+ uuid_utoa(local->gfid), NULL);
DHT_STACK_DESTROY(frame);
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
index f9bac4f97c8..638821ccee5 100644
--- a/xlators/cluster/dht/src/dht-lock.c
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -44,7 +44,8 @@ dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
if (!lk_buf)
goto out;
- gf_msg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "%d. %s", i, lk_buf);
+ gf_smsg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "index=%d", i,
+ "lk_buf=%s", lk_buf, NULL);
GF_FREE(lk_buf);
}
@@ -313,11 +314,9 @@ dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "unlock failed on gfid: %s, stale lock might be left "
- "in DHT_LAYOUT_HEAL_DOMAIN",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "gfid=%s", gfid,
+ "DHT_LAYOUT_HEAL_DOMAIN", NULL);
}
DHT_STACK_DESTROY(frame);
@@ -339,9 +338,10 @@ dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
- "unlocking failed on %s:%s",
- local->lock[0].ns.directory_ns.locks[lk_index]->xl->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].ns.directory_ns.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
} else {
local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0;
}
@@ -375,9 +375,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "cannot allocate a frame, not unlocking following "
- "entrylks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
goto done;
@@ -385,9 +385,9 @@ dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "storing locks in local failed, not unlocking "
- "following entrylks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
@@ -446,21 +446,17 @@ dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk)
lock_frame = copy_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "copy frame failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "local creation failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
@@ -700,9 +696,10 @@ dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
gfid);
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
- "unlocking failed on %s:%s",
- local->lock[0].layout.my_layout.locks[lk_index]->xl->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
} else {
local->lock[0].layout.my_layout.locks[lk_index]->locked = 0;
}
@@ -727,11 +724,9 @@ dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
if (op_ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, op_errno,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "unlock failed on gfid: %s, stale lock might be left "
- "in DHT_LAYOUT_HEAL_DOMAIN",
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "DHT_LAYOUT_HEAL_DOMAIN gfid=%s",
+ gfid, NULL);
}
DHT_STACK_DESTROY(frame);
@@ -762,9 +757,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "cannot allocate a frame, not unlocking following "
- "locks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
goto done;
@@ -772,9 +767,9 @@ dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
if (ret < 0) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
- "storing locks in local failed, not unlocking "
- "following locks:");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
@@ -834,21 +829,17 @@ dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk)
lock_frame = copy_frame(frame);
if (lock_frame == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "copy frame failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
if (lock_local == NULL) {
- gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM,
- DHT_MSG_PARENT_LAYOUT_CHANGED,
- "mkdir (%s/%s) (path: %s): "
- "local creation failed",
- pgfid, local->loc.name, local->loc.path);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "gfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
goto done;
}
@@ -1039,13 +1030,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s. gfid:%s",
- local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->xl->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
break;
@@ -1060,13 +1050,12 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s. gfid:%s",
- local->lock[0]
- .layout.my_layout.locks[lk_index]
- ->xl->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
break;
@@ -1077,11 +1066,11 @@ dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gfid);
local->lock[0].layout.my_layout.op_ret = -1;
local->lock[0].layout.my_layout.op_errno = op_errno;
- gf_msg(
+ gf_smsg(
this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED,
- "inodelk failed on subvol %s, gfid:%s",
+ "subvol=%s",
local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
- gfid);
+ "gfid=%s", gfid, NULL);
goto cleanup;
}
}
@@ -1153,19 +1142,16 @@ dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
lock_frame = dht_lock_frame(frame);
if (lock_frame == NULL) {
gf_uuid_unparse(tmp_local->loc.gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
- "memory allocation failed for lock_frame. gfid:%s"
- " path:%s",
- gfid, tmp_local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
goto out;
}
ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
if (ret < 0) {
gf_uuid_unparse(tmp_local->loc.gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
- "dht_local_lock_init failed, gfid: %s path:%s", gfid,
- tmp_local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
goto out;
}
@@ -1246,11 +1232,10 @@ dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
if (ret < 0) {
local->op_ret = -1;
local->op_errno = EIO;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_ENTRYLK_ERROR,
- "%s (%s/%s): "
- "dht_blocking_entrylk failed after taking inodelk",
- gf_fop_list[local->fop], pgfid, entrylk->locks[0]->basename);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, "fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "basename=%s",
+ entrylk->locks[0]->basename, NULL);
goto err;
}
@@ -1310,10 +1295,9 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
ret = dht_build_parent_loc(this, &parent, loc, &op_errno);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
- "gfid:%s (name:%s) (path: %s): "
- "parent loc build failed",
- loc->gfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
+ "gfid=%s", loc->gfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
goto out;
}
gf_uuid_unparse(parent.gfid, pgfid);
@@ -1322,10 +1306,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
if (inodelk->locks == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "calloc failure",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
goto out;
}
@@ -1334,10 +1318,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
FAIL_ON_ANY_ERROR);
if (inodelk->locks[0] == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "inodelk: lock allocation failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "inodelk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
inodelk->lk_count = count;
@@ -1346,10 +1330,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
if (entrylk->locks == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "entrylk: calloc failure",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
@@ -1359,10 +1343,10 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
FAIL_ON_ANY_ERROR);
if (entrylk->locks[0] == NULL) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno, DHT_MSG_NO_MEMORY,
- "%s (%s/%s) (path: %s): "
- "entrylk: lock allocation failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
goto err;
}
@@ -1376,11 +1360,11 @@ dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
dht_blocking_entrylk_after_inodelk);
if (ret < 0) {
local->op_errno = EIO;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_INODELK_ERROR,
- "%s (%s/%s) (path: %s): "
- "dht_blocking_inodelk failed",
- gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_BLOCK_INODELK_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-lock.h b/xlators/cluster/dht/src/dht-lock.h
index 802970adb3b..6485c03fb6e 100644
--- a/xlators/cluster/dht/src/dht-lock.h
+++ b/xlators/cluster/dht/src/dht-lock.h
@@ -11,7 +11,6 @@
#ifndef _DHT_LOCK_H
#define _DHT_LOCK_H
-#include "xlator.h"
#include "dht-common.h"
void
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index c4430741ff7..e3c4471334a 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __DHT_MEM_TYPES_H__
#define __DHT_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_dht_mem_types_ {
gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
@@ -30,10 +30,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_container_t,
gf_dht_mt_octx_t,
gf_dht_mt_miginfo_t,
- gf_tier_mt_bricklist_t,
- gf_tier_mt_ipc_ctr_params_t,
gf_dht_mt_fd_ctx_t,
- gf_tier_mt_qfile_array_t,
gf_dht_ret_cache_t,
gf_dht_nodeuuids_t,
gf_dht_mt_end
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index 005ab57b505..601f8dad78b 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -10,7 +10,7 @@
#ifndef _DHT_MESSAGES_H_
#define _DHT_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -38,12 +38,11 @@ GLFS_MSGID(
DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
- DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT, DHT_MSG_LOG_TIER_ERROR,
- DHT_MSG_LOG_TIER_STATUS, DHT_MSG_GET_XATTR_FAILED,
- DHT_MSG_FILE_LOOKUP_FAILED, DHT_MSG_OPEN_FD_FAILED,
- DHT_MSG_SET_INODE_CTX_FAILED, DHT_MSG_UNLOCKING_FAILED,
- DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO, DHT_MSG_CHUNK_SIZE_INFO,
- DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
@@ -69,8 +68,7 @@ GLFS_MSGID(
DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
- DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_LOG_IPC_TIER_ERROR,
- DHT_MSG_TIER_PAUSED, DHT_MSG_TIER_RESUME, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
@@ -79,6 +77,310 @@ GLFS_MSGID(
DHT_MSG_ENTRYLK_ERROR, DHT_MSG_INODELK_ERROR, DHT_MSG_LOC_FAILED,
DHT_MSG_UNKNOWN_FOP, DHT_MSG_MIGRATE_FILE_SKIPPED,
DHT_MSG_DIR_XATTR_HEAL_FAILED, DHT_MSG_HASHED_SUBVOL_DOWN,
- DHT_MSG_NON_HASHED_SUBVOL_DOWN);
+ DHT_MSG_NON_HASHED_SUBVOL_DOWN, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ DHT_MSG_DIR_HEAL_ABORT, DHT_MSG_MIGRATE_SKIP, DHT_MSG_FD_CREATE_FAILED,
+ DHT_MSG_DICT_NEW_FAILED, DHT_MSG_FAILED_TO_OPEN, DHT_MSG_CREATE_FAILED,
+ DHT_MSG_FILE_NOT_EXIST, DHT_MSG_CHOWN_FAILED, DHT_MSG_FALLOCATE_FAILED,
+ DHT_MSG_FTRUNCATE_FAILED, DHT_MSG_STATFS_FAILED, DHT_MSG_WRITE_CROSS,
+ DHT_MSG_NEW_TARGET_FOUND, DHT_MSG_INSUFF_MEMORY, DHT_MSG_SET_XATTR_FAILED,
+ DHT_MSG_SET_MODE_FAILED, DHT_MSG_FILE_EXISTS_IN_DEST,
+ DHT_MSG_SYMLINK_FAILED, DHT_MSG_LINKFILE_DEL_FAILED, DHT_MSG_MKNOD_FAILED,
+ DHT_MSG_MIGRATE_CLEANUP_FAILED, DHT_MSG_LOCK_MIGRATE,
+ DHT_MSG_PARENT_BUILD_FAILED, DHT_MSG_HASHED_SUBVOL_NOT_FOUND,
+ DHT_MSG_ACQUIRE_ENTRYLK_FAILED, DHT_MSG_CREATE_DST_FAILED,
+ DHT_MSG_MIGRATION_EXIT, DHT_MSG_CHANGED_DST, DHT_MSG_TRACE_FAILED,
+ DHT_MSG_WRITE_LOCK_FAILED, DHT_MSG_GETACTIVELK_FAILED, DHT_MSG_STAT_FAILED,
+ DHT_MSG_UNLINK_PERFORM_FAILED, DHT_MSG_CLANUP_SOURCE_FILE_FAILED,
+ DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
+ DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
+ DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
+ DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
+ DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
+ DHT_MSG_STALE_LINKFILE_DELETE, DHT_MSG_NO_SUBVOL_FOR_LINKTO,
+ DHT_MSG_SUBVOL_RETURNED, DHT_MSG_UNKNOWN_LOCAL_XSEL, DHT_MSG_GET_XATTR_ERR,
+ DHT_MSG_ALLOC_OR_FILL_FAILED, DHT_MSG_GET_REAL_NAME_FAILED,
+ DHT_MSG_COPY_UUID_FAILED, DHT_MSG_MDS_DETER_FAILED,
+ DHT_MSG_CREATE_REBAL_FAILED, DHT_MSG_LINK_LAYOUT_FAILED,
+ DHT_MSG_NO_SUBVOL_IN_LAYOUT, DHT_MSG_MEM_ALLOC_FAILED,
+ DHT_MSG_SET_IN_PARAMS_DICT_FAILED, DHT_MSG_LOC_COPY_FAILED,
+ DHT_MSG_PARENT_LOC_FAILED, DHT_MSG_CREATE_LOCK_FAILED,
+ DHT_MSG_PREV_ATTEMPT_FAILED, DHT_MSG_REFRESH_ATTEMPT,
+ DHT_MSG_ACQUIRE_LOCK_FAILED, DHT_MSG_CREATE_STUB_FAILED,
+ DHT_MSG_WIND_LOCK_REQ_FAILED, DHT_MSG_REFRESH_FAILED,
+ DHT_MSG_CACHED_SUBVOL_ERROR, DHT_MSG_NO_LINK_SUBVOL, DHT_MSG_SET_KEY_FAILED,
+ DHT_MSG_REMOVE_LINKTO_FAILED, DHT_MSG_LAYOUT_DICT_SET_FAILED,
+ DHT_MSG_XATTR_DICT_NULL, DHT_MSG_DUMMY_ALLOC_FAILED, DHT_MSG_DICT_IS_NULL,
+ DHT_MSG_LINK_INODE_FAILED, DHT_MSG_SELFHEAL_FAILED, DHT_MSG_NO_MDS_SUBVOL,
+ DHT_MSG_LIST_XATTRS_FAILED, DHT_MSG_RESET_INTER_XATTR_FAILED,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, DHT_MSG_WIND_UNLOCK_FAILED,
+ DHT_MSG_COMMIT_HASH_FAILED, DHT_MSG_UNLOCK_GFID_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_ENTRYLK, DHT_MSG_COPY_FRAME_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_LOCKS, DHT_MSG_ENTRYLK_FAILED_AFT_INODELK,
+ DHT_MSG_CALLOC_FAILED, DHT_MSG_LOCK_ALLOC_FAILED,
+ DHT_MSG_BLOCK_INODELK_FAILED,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ DHT_MSG_DST_NULL_SET_FAILED);
+
+#define DHT_MSG_FD_CTX_SET_FAILED_STR "Failed to set fd ctx"
+#define DHT_MSG_INVALID_VALUE_STR "Different dst found in the fd ctx"
+#define DHT_MSG_UNKNOWN_FOP_STR "Unknown FOP on file"
+#define DHT_MSG_OPEN_FD_ON_DST_FAILED_STR "Failed to open the fd on file"
+#define DHT_MSG_SYNCTASK_CREATE_FAILED_STR "Failed to create synctask"
+#define DHT_MSG_ASPRINTF_FAILED_STR \
+ "asprintf failed while fetching subvol from the id"
+#define DHT_MSG_HAS_MIGINFO_STR "Found miginfo in the inode ctx"
+#define DHT_MSG_FILE_LOOKUP_FAILED_STR "failed to lookup the file"
+#define DHT_MSG_INVALID_LINKFILE_STR \
+ "linkto target is different from cached-subvol. treating as destination " \
+ "subvol"
+#define DHT_MSG_GFID_MISMATCH_STR "gfid different on the target file"
+#define DHT_MSG_GET_XATTR_FAILED_STR "failed to get 'linkto' xattr"
+#define DHT_MSG_SET_INODE_CTX_FAILED_STR "failed to set inode-ctx target file"
+#define DHT_MSG_DIR_SELFHEAL_FAILED_STR "Healing of path failed"
+#define DHT_MSG_DIR_HEAL_ABORT_STR \
+ "Failed to get path from subvol. Aborting directory healing"
+#define DHT_MSG_DIR_XATTR_HEAL_FAILED_STR "xattr heal failed for directory"
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED_STR \
+ "Found a NULL inode. Failed to unref the inode"
+#define DHT_MSG_DICT_SET_FAILED_STR "Failed to set dictionary value"
+#define DHT_MSG_NOT_LINK_FILE_ERROR_STR "got non-linkfile"
+#define DHT_MSG_CREATE_LINK_FAILED_STR "failed to initialize linkfile data"
+#define DHT_MSG_UNLINK_FAILED_STR "Unlinking linkfile on subvolume failed"
+#define DHT_MSG_MIGRATE_FILE_FAILED_STR "Migrate file failed"
+#define DHT_MSG_NO_MEMORY_STR "could not allocate memory for dict"
+#define DHT_MSG_SUBVOL_ERROR_STR "Failed to get linkto subvol"
+#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED_STR "link failed on subvol"
+#define DHT_MSG_MIGRATE_FILE_SKIPPED_STR "Migration skipped"
+#define DHT_MSG_FD_CREATE_FAILED_STR "fd create failed"
+#define DHT_MSG_DICT_NEW_FAILED_STR "dict_new failed"
+#define DHT_MSG_FAILED_TO_OPEN_STR "failed to open"
+#define DHT_MSG_CREATE_FAILED_STR "failed to create"
+#define DHT_MSG_FILE_NOT_EXIST_STR "file does not exist"
+#define DHT_MSG_CHOWN_FAILED_STR "chown failed"
+#define DHT_MSG_FALLOCATE_FAILED_STR "fallocate failed"
+#define DHT_MSG_FTRUNCATE_FAILED_STR "ftruncate failed"
+#define DHT_MSG_STATFS_FAILED_STR "failed to get statfs"
+#define DHT_MSG_WRITE_CROSS_STR \
+ "write will cross min-fre-disk for file on subvol. looking for new subvol"
+#define DHT_MSG_SUBVOL_INSUFF_SPACE_STR \
+ "Could not find any subvol with space accommodating the file. Cosider " \
+ "adding bricks"
+#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
+#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
+#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
+#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
+#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
+#define DHT_MSG_LINKFILE_DEL_FAILED_STR "failed to delete the linkfile"
+#define DHT_MSG_SYMLINK_FAILED_STR "symlink failed"
+#define DHT_MSG_MKNOD_FAILED_STR "mknod failed"
+#define DHT_MSG_SETATTR_FAILED_STR "failed to perform setattr"
+#define DHT_MSG_MIGRATE_CLEANUP_FAILED_STR \
+ "Migrate file cleanup failed: failed to fstat file"
+#define DHT_MSG_LOCK_MIGRATE_STR "locks will be migrated for file"
+#define DHT_MSG_PARENT_BUILD_FAILED_STR \
+ "failed to build parent loc, which is needed to acquire entrylk to " \
+ "synchronize with renames on this path. Skipping migration"
+#define DHT_MSG_HASHED_SUBVOL_NOT_FOUND_STR \
+ "cannot find hashed subvol which is needed to synchronize with renames " \
+ "on this path. Skipping migration"
+#define DHT_MSG_ACQUIRE_ENTRYLK_FAILED_STR "failed to acquire entrylk on subvol"
+#define DHT_MSG_CREATE_DST_FAILED_STR "create dst failed for file"
+#define DHT_MSG_MIGRATION_EXIT_STR "Exiting migration"
+#define DHT_MSG_CHANGED_DST_STR "destination changed fo file"
+#define DHT_MSG_TRACE_FAILED_STR "Trace failed"
+#define DHT_MSG_WRITE_LOCK_FAILED_STR "write lock failed"
+#define DHT_MSG_GETACTIVELK_FAILED_STR "getactivelk failed for file"
+#define DHT_MSG_STAT_FAILED_STR "failed to do a stat"
+#define DHT_MSG_UNLINK_PERFORM_FAILED_STR "failed to perform unlink"
+#define DHT_MSG_MIGRATE_FILE_COMPLETE_STR "completed migration"
+#define DHT_MSG_CLANUP_SOURCE_FILE_FAILED_STR "failed to cleanup source file"
+#define DHT_MSG_UNLOCK_FILE_FAILED_STR "failed to unlock file"
+#define DHT_MSG_REMOVE_XATTR_FAILED_STR "remove xattr failed"
+#define DHT_MSG_SOCKET_ERROR_STR "Failed to unlink listener socket"
+#define DHT_MSG_HASHED_SUBVOL_GET_FAILED_STR "Failed to get hashed subvolume"
+#define DHT_MSG_CACHED_SUBVOL_GET_FAILED_STR "Failed to get cached subvolume"
+#define DHT_MSG_MIGRATE_DATA_FAILED_STR "migrate-data failed"
+#define DHT_MSG_DEFRAG_NULL_STR "defrag is NULL"
+#define DHT_MSG_DATA_MIGRATE_ABORT_STR \
+ "Readdirp failed. Aborting data migration for dict"
+#define DHT_MSG_LAYOUT_FIX_FAILED_STR "fix layout failed"
+#define DHT_MSG_PARENT_NULL_STR "parent is NULL"
+#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
+#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
+#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
+#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
+#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
+#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
+#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
+#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
+#define DHT_MSG_FIX_NOT_COMP_STR \
+ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \
+ "complete"
+#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
+ "local subvolume determination failed with error"
+#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
+#define DHT_MSG_NODE_UUID_STR "node uuid"
+#define DHT_MSG_SIZE_FILE_STR "Total size files"
+#define DHT_MSG_GET_DATA_SIZE_FAILED_STR \
+ "Failed to get the total data size. Unable to estimate time to complete " \
+ "rebalance"
+#define DHT_MSG_PTHREAD_JOIN_FAILED_STR \
+ "file_counter_thread: pthread_join failed"
+#define DHT_MSG_COUNTER_THREAD_CREATE_FAILED_STR \
+ "Failed to create the file counter thread"
+#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
+ "Failed to initialise migration queue"
+#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
+#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
+#define DHT_MSG_WOKE_STR "woken"
+#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
+#define DHT_MSG_REBALANCE_START_FAILED_STR \
+ "Failed to start rebalance: look up on / failed"
+#define DHT_MSG_CREATE_TASK_REBAL_FAILED_STR \
+ "Could not create task for rebalance"
+#define DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL_STR \
+ "Rebalance estimates will not be available"
+#define DHT_MSG_REBALANCE_STATUS_STR "Rebalance status"
+#define DHT_MSG_DATA_NULL_STR "data value is NULL"
+#define DHT_MSG_ADD_CHOICES_ERROR_STR "Error to add choices in buffer"
+#define DHT_MSG_GET_CHOICES_ERROR_STR "Error to get choices"
+#define DHT_MSG_PREPARE_STATUS_ERROR_STR "Error to prepare status"
+#define DHT_MSG_SET_CHOICE_FAILED_STR "Failed to set full choice"
+#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED_STR \
+ "Failed to aggregate quota xattr"
+#define DHT_MSG_FILE_TYPE_MISMATCH_STR \
+ "path exists as a file on one subvolume and directory on another. Please " \
+ "fix it manually"
+#define DHT_MSG_LAYOUT_SET_FAILED_STR "failed to set layout for subvolume"
+#define DHT_MSG_LAYOUT_MERGE_FAILED_STR "failed to merge layouts for subvolume"
+#define DHT_MSG_SET_HASHED_SUBVOL_FAILED_STR "Failed to set hashed subvolume"
+#define DHT_MSG_XATTR_HEAL_NOT_POSS_STR \
+ "No gfid exists for path. so healing xattr is not possible"
+#define DHT_MSG_REVALIDATE_CBK_INFO_STR "Revalidate: subvolume returned -1"
+#define DHT_MSG_LAYOUT_MISMATCH_STR "Mismatching layouts"
+#define DHT_MSG_UNLINK_LOOKUP_INFO_STR "lookup_unlink retuened"
+#define DHT_MSG_LINKTO_FILE_FAILED_STR \
+ "Could not unlink the linkto file as either fd is open and/or linkto " \
+ "xattr is set"
+#define DHT_MSG_LAYOUT_PRESET_FAILED_STR \
+ "Could not set pre-set layout for subvolume"
+#define DHT_MSG_FILE_ON_MULT_SUBVOL_STR \
+ "multiple subvolumes have file (preferably rename the file in the " \
+ "backend, and do a fresh lookup"
+#define DHT_MSG_STALE_LINKFILE_DELETE_STR \
+ "attempting deletion of stale linkfile"
+#define DHT_MSG_LINK_FILE_LOOKUP_INFO_STR "Lookup on following linkfile"
+#define DHT_MSG_NO_SUBVOL_FOR_LINKTO_STR "No link subvolume for linkto"
+#define DHT_MSG_SUBVOL_RETURNED_STR "Subvolume returned -1"
+#define DHT_MSG_UNKNOWN_LOCAL_XSEL_STR "Unknown local->xsel"
+#define DHT_MSG_DICT_GET_FAILED_STR "Failed to get"
+#define DHT_MSG_UUID_PARSE_ERROR_STR "Failed to parse uuid"
+#define DHT_MSG_GET_XATTR_ERR_STR "getxattr err for dir"
+#define DHT_MSG_ALLOC_OR_FILL_FAILED_STR "alloc or fill failed"
+#define DHT_MSG_UPGRADE_BRICKS_STR \
+ "At least one of the bricks does not support this operation. Please " \
+ "upgrade all bricks"
+#define DHT_MSG_GET_REAL_NAME_FAILED_STR "Failed to get real filename"
+#define DHT_MSG_LAYOUT_NULL_STR "Layout is NULL"
+#define DHT_MSG_COPY_UUID_FAILED_STR "Failed to copy node uuid key"
+#define DHT_MSG_MDS_DETER_FAILED_STR \
+ "Cannot determine MDS, fetching xattr randomly from a subvol"
+#define DHT_MSG_HASHED_SUBVOL_DOWN_STR \
+ "MDS is down for path, so fetching xattr randomly from subvol"
+#define DHT_MSG_CREATE_REBAL_FAILED_STR \
+ "failed to create a new rebalance synctask"
+#define DHT_MSG_FIX_LAYOUT_INFO_STR "fixing the layout"
+#define DHT_MSG_OPERATION_NOT_SUP_STR "wrong directory-spread-count value"
+#define DHT_MSG_LINK_LAYOUT_FAILED_STR "failed to link the layout in inode"
+#define DHT_MSG_NO_SUBVOL_IN_LAYOUT_STR "no subvolume in layout for path"
+#define DHT_MSG_INODE_LK_ERROR_STR "mknod lock failed for file"
+#define DHT_MSG_MEM_ALLOC_FAILED_STR "mem allocation failed"
+#define DHT_MSG_PARENT_LAYOUT_CHANGED_STR \
+ "extracting in-memory layout of parent failed"
+#define DHT_MSG_SET_IN_PARAMS_DICT_FAILED_STR \
+ "setting in params dictionary failed"
+#define DHT_MSG_LOC_COPY_FAILED_STR "loc_copy failed"
+#define DHT_MSG_LOC_FAILED_STR "parent loc build failed"
+#define DHT_MSG_PARENT_LOC_FAILED_STR "locking parent failed"
+#define DHT_MSG_CREATE_LOCK_FAILED_STR "Create lock failed"
+#define DHT_MSG_PREV_ATTEMPT_FAILED_STR \
+ "mkdir loop detected. parent layout didn't change even though previous " \
+ "attempt of mkdir failed because of in-memory layout not matching with " \
+ "that on disk."
+#define DHT_MSG_REFRESH_ATTEMPT_STR \
+ "mkdir parent layout changed. Attempting a refresh and then a retry"
+#define DHT_MSG_ACQUIRE_LOCK_FAILED_STR \
+ "Acquiring lock on parent to guard against layout-change failed"
+#define DHT_MSG_CREATE_STUB_FAILED_STR "creating stub failed"
+#define DHT_MSG_WIND_LOCK_REQ_FAILED_STR \
+ "cannot wind lock request to guard parent layout"
+#define DHT_MSG_REFRESH_FAILED_STR "refreshing parent layout failed."
+#define DHT_MSG_CACHED_SUBVOL_ERROR_STR "On cached subvol"
+#define DHT_MSG_NO_LINK_SUBVOL_STR "Linkfile does not have link subvolume"
+#define DHT_MSG_SET_KEY_FAILED_STR "failed to set key"
+#define DHT_MSG_CHILD_DOWN_STR "Received CHILD_DOWN. Exiting"
+#define DHT_MSG_LOG_FIXED_LAYOUT_STR "log layout fixed"
+#define DHT_MSG_REBAL_STRUCT_SET_STR "local->rebalance already set"
+#define DHT_MSG_REMOVE_LINKTO_FAILED_STR "Removal of linkto failed at subvol"
+#define DHT_MSG_LAYOUT_DICT_SET_FAILED_STR "dht layout dict set failed"
+#define DHT_MSG_SUBVOL_INFO_STR "creating subvolume"
+#define DHT_MSG_COMPUTE_HASH_FAILED_STR "hash computation failed"
+#define DHT_MSG_INVALID_DISK_LAYOUT_STR \
+ "Invalid disk layout: Catastrophic error layout with unknown type found"
+#define DHT_MSG_LAYOUT_SORT_FAILED_STR "layout sort failed"
+#define DHT_MSG_ANOMALIES_INFO_STR "Found anomalies"
+#define DHT_MSG_XATTR_DICT_NULL_STR "xattr dictionary is NULL"
+#define DHT_MSG_DISK_LAYOUT_MISSING_STR "Disk layout missing"
+#define DHT_MSG_LAYOUT_INFO_STR "layout info"
+#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO_STR "no pre-set layout for subvol"
+#define DHT_MSG_SELFHEAL_XATTR_FAILED_STR "layout setxattr failed"
+#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED_STR "Directory self heal xattr failed"
+#define DHT_MSG_DUMMY_ALLOC_FAILED_STR "failed to allocate dummy layout"
+#define DHT_MSG_DICT_IS_NULL_STR \
+ "dict is NULL, need to make sure gfids are same"
+#define DHT_MSG_ENTRYLK_ERROR_STR "acquiring entrylk after inodelk failed"
+#define DHT_MSG_NO_DISK_USAGE_STATUS_STR "no du stats"
+#define DHT_MSG_LINK_INODE_FAILED_STR "linking inode failed"
+#define DHT_MSG_SELFHEAL_FAILED_STR "Directory selfheal failed"
+#define DHT_MSG_NO_MDS_SUBVOL_STR "No mds subvol"
+#define DHT_MSG_LIST_XATTRS_FAILED_STR "failed to list xattrs"
+#define DHT_MSG_RESET_INTER_XATTR_FAILED_STR "Failed to reset internal xattr"
+#define DHT_MSG_MDS_DOWN_UNABLE_TO_SET_STR \
+ "mds subvol is down, unable to set xattr"
+#define DHT_MSG_DIR_ATTR_HEAL_FAILED_STR \
+ "Directory attr heal failed. Failed to set uid/gid"
+#define DHT_MSG_WIND_UNLOCK_FAILED_STR \
+ "Winding unlock failed: stale locks left on brick"
+#define DHT_MSG_COMMIT_HASH_FAILED_STR "Directory commit hash updaten failed"
+#define DHT_MSG_LK_ARRAY_INFO_STR "lk info"
+#define DHT_MSG_UNLOCK_GFID_FAILED_STR \
+ "unlock failed on gfid: stale lock might be left"
+#define DHT_MSG_UNLOCKING_FAILED_STR "unlocking failed"
+#define DHT_MSG_UNLOCK_FOLLOW_ENTRYLK_STR "not unlocking following entrylks"
+#define DHT_MSG_COPY_FRAME_FAILED_STR "copy frame failed"
+#define DHT_MSG_UNLOCK_FOLLOW_LOCKS_STR "not unlocking following locks"
+#define DHT_MSG_INODELK_FAILED_STR "inodelk failed on subvol"
+#define DHT_MSG_LOCK_FRAME_FAILED_STR "memory allocation failed for lock_frame"
+#define DHT_MSG_LOCAL_LOCK_INIT_FAILED_STR "dht_local_lock_init failed"
+#define DHT_MSG_ENTRYLK_FAILED_AFT_INODELK_STR \
+ "dht_blocking_entrylk failed after taking inodelk"
+#define DHT_MSG_BLOCK_INODELK_FAILED_STR "dht_blocking_inodelk failed"
+#define DHT_MSG_CALLOC_FAILED_STR "calloc failed"
+#define DHT_MSG_LOCK_ALLOC_FAILED_STR "lock allocation failed"
+#define DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS_STR \
+ "cannot allocate a frame, not unlocking following entrylks"
+#define DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK_STR \
+ "storing locks in local failed, not unlocking following entrylks"
+#define DHT_MSG_DST_NULL_SET_FAILED_STR \
+ "src or dst is NULL, Failed to set dictionary value"
#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 41a587823a9..8ba8082bd86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -9,16 +9,15 @@
*/
#include "dht-common.h"
-#include "xlator.h"
-#include "syscall.h"
-#include <signal.h>
+#include <glusterfs/syscall.h>
#include <fnmatch.h>
#include <signal.h>
-#include "events.h"
+#include <glusterfs/events.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#define GF_DISK_SECTOR_SIZE 512
-#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
#define MAX_MIGRATE_QUEUE_COUNT 500
#define MIN_MIGRATE_QUEUE_COUNT 200
#define MAX_REBAL_TYPE_SIZE 16
@@ -46,7 +45,10 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
if (meta) {
for (i = 0; i < local_subvols_cnt; i++) {
- gf_dirent_free(&meta->equeue[i]);
+ if (meta->equeue)
+ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
}
GF_FREE(meta->equeue);
@@ -54,6 +56,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
GF_FREE(meta->iterator);
GF_FREE(meta->offset_var);
GF_FREE(meta->fetch_entries);
+ GF_FREE(meta->lfd);
GF_FREE(meta);
}
}
@@ -85,26 +88,6 @@ dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret)
return;
}
-static gf_boolean_t
-dht_is_tier_command(int cmd)
-{
- gf_boolean_t is_tier = _gf_false;
-
- switch (cmd) {
- case GF_DEFRAG_CMD_START_TIER:
- case GF_DEFRAG_CMD_STATUS_TIER:
- case GF_DEFRAG_CMD_START_DETACH_TIER:
- case GF_DEFRAG_CMD_STOP_DETACH_TIER:
- case GF_DEFRAG_CMD_PAUSE_TIER:
- case GF_DEFRAG_CMD_RESUME_TIER:
- is_tier = _gf_true;
- break;
- default:
- break;
- }
- return is_tier;
-}
-
static int
dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
{
@@ -113,8 +96,6 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
char *tmpstr = NULL;
char *ptr = NULL;
char *suffix = "-dht";
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
int len = 0;
eventtypes_t event = EVENT_LAST;
@@ -133,21 +114,14 @@ dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
break;
}
- if (dht_is_tier_command(cmd)) {
- /* We should have the tier volume name*/
- conf = this->private;
- defrag = conf->defrag;
- volname = defrag->tier_conf.volname;
- } else {
- /* DHT volume */
- len = strlen(this->name) - strlen(suffix);
- tmpstr = gf_strdup(this->name);
- if (tmpstr) {
- ptr = tmpstr + len;
- if (!strcmp(ptr, suffix)) {
- tmpstr[len] = '\0';
- volname = tmpstr;
- }
+ /* DHT volume */
+ len = strlen(this->name) - strlen(suffix);
+ tmpstr = gf_strdup(this->name);
+ if (tmpstr) {
+ ptr = tmpstr + len;
+ if (!strcmp(ptr, suffix)) {
+ tmpstr[len] = '\0';
+ volname = tmpstr;
}
}
@@ -173,75 +147,6 @@ dht_strip_out_acls(dict_t *dict)
}
}
-static int
-dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count,
- int32_t size, off_t offset, struct iobref *iobref,
- int *fop_errno)
-{
- int i = 0;
- int ret = -1;
- int start_idx = 0;
- int tmp_offset = 0;
- int write_needed = 0;
- int buf_len = 0;
- int size_pending = 0;
- char *buf = NULL;
-
- /* loop through each vector */
- for (i = 0; i < count; i++) {
- buf = vec[i].iov_base;
- buf_len = vec[i].iov_len;
-
- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
- start_idx += GF_DISK_SECTOR_SIZE) {
- if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
- write_needed = 1;
- continue;
- }
-
- if (write_needed) {
- ret = syncop_write(
- to, fd, (buf + tmp_offset), (start_idx - tmp_offset),
- (offset + tmp_offset), iobref, 0, NULL, NULL);
- /* 'path' will be logged in calling function */
- if (ret < 0) {
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
-
- write_needed = 0;
- }
- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
- }
-
- if ((start_idx < buf_len) || write_needed) {
- /* This means, last chunk is not yet written.. write it */
- ret = syncop_write(to, fd, (buf + tmp_offset),
- (buf_len - tmp_offset), (offset + tmp_offset),
- iobref, 0, NULL, NULL);
- if (ret < 0) {
- /* 'path' will be logged in calling function */
- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)",
- strerror(-ret));
- *fop_errno = -ret;
- ret = -1;
- goto out;
- }
- }
-
- size_pending = (size - buf_len);
- if (!size_pending)
- break;
- }
-
- ret = size;
-out:
- return ret;
-}
-
/*
return values:
-1 : failure
@@ -649,7 +554,7 @@ out:
static int
__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
- int *fop_errno)
+ int *fop_errno, int file_has_holes)
{
int ret = -1;
int ret2 = -1;
@@ -704,26 +609,23 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
- if (!!dht_is_tier_xlator(this)) {
- xdata = dict_new();
- if (!xdata) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- DHT_MSG_MIGRATE_FILE_FAILED, "%s: dict_new failed)",
- loc->path);
- goto out;
- }
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
- ret = dict_set_int32(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
- if (ret) {
- *fop_errno = ENOMEM;
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: failed to set dictionary value: key = %s ", loc->path,
- GF_CLEAN_WRITE_PROTECTION);
- goto out;
- }
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
}
ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
@@ -818,7 +720,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
/* No need to bother about 0 byte size files */
if (stbuf->ia_size > 0) {
- if (conf->use_fallocate) {
+ if (conf->use_fallocate && !file_has_holes) {
ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
if (ret < 0) {
if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
@@ -845,9 +747,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
goto out;
}
}
- }
-
- if (!conf->use_fallocate) {
+ } else {
ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
NULL);
if (ret < 0) {
@@ -875,7 +775,7 @@ out:
dict_unref(dict);
if (xdata)
- dict_unref(dict);
+ dict_unref(xdata);
return ret;
}
@@ -1098,32 +998,103 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
int ret = 0;
int count = 0;
off_t offset = 0;
+ off_t data_offset = 0;
+ off_t hole_offset = 0;
struct iovec *vector = NULL;
struct iobref *iobref = NULL;
uint64_t total = 0;
size_t read_size = 0;
+ size_t data_block_size = 0;
dict_t *xdata = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
+
/* if file size is '0', no need to enter this loop */
while (total < ia_size) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
- ? DHT_REBALANCE_BLKSIZE
- : (ia_size - total));
+ /* This is a regular file - read it sequentially */
+ if (!hole_exists) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : (ia_size - total));
+ } else {
+ /* This is a sparse file - read only the data segments in the file
+ */
+
+ /* If the previous data block is fully copied, find the next data
+ * segment
+ * starting at the offset of the last read and written byte, */
+ if (data_block_size <= 0) {
+ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+ &data_offset);
+ if (ret) {
+ if (ret == -ENXIO)
+ ret = 0; /* No more data segments */
+ else
+ *fop_errno = -ret; /* Error occurred */
+
+ break;
+ }
+
+ /* If the position of the current data segment is greater than
+ * the position of the next hole, find the next hole in order to
+ * calculate the length of the new data segment */
+ if (data_offset > hole_offset) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole */
+ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+ NULL, &hole_offset);
+ if (ret) {
+ /* If an error occurred here it's a real error because
+ * if the seek for a data segment was successful then
+ * necessarily another hole must exist (EOF is a hole)
+ */
+ *fop_errno = -ret;
+ break;
+ }
+
+ /* Calculate the total size of the current data block */
+ data_block_size = hole_offset - data_offset;
+ }
+ } else {
+ /* There is still data in the current segment, move the
+ * data_offset to the position of the last written byte */
+ data_offset = offset;
+ }
+
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
+ /* Set offset to the offset of the data segment so read and write
+ * will have the correct position */
+ offset = data_offset;
+ }
ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
&iobref, NULL, NULL, NULL);
+
if (!ret || (ret < 0)) {
- *fop_errno = -ret;
+ if (!ret) {
+ /* File was probably truncated*/
+ ret = -1;
+ *fop_errno = ENOSPC;
+ } else {
+ *fop_errno = -ret;
+ }
break;
}
- if (hole_exists) {
- ret = dht_write_with_holes(to, dst, vector, count, ret, offset,
- iobref, fop_errno);
- } else {
- if (!conf->force_migration && !dht_is_tier_xlator(this)) {
+ if (!conf->force_migration) {
+ if (!xdata) {
xdata = dict_new();
if (!xdata) {
gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
@@ -1143,7 +1114,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
* https://github.com/gluster/glusterfs/issues/308
* for more details.
*/
- ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1);
+ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
if (ret) {
gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
"failed to set dict");
@@ -1152,22 +1123,12 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
break;
}
}
-
- ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
- NULL, xdata, NULL);
- if (ret < 0) {
- *fop_errno = -ret;
- }
- }
-
- if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) &&
- (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused");
- ret = -1;
}
+ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+ NULL, xdata, NULL);
if (ret < 0) {
+ *fop_errno = -ret;
break;
}
@@ -1561,6 +1522,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
xlator_t *old_target = NULL;
xlator_t *hashed_subvol = NULL;
fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
if (from == to) {
gf_msg_debug(this->name, 0,
@@ -1571,20 +1533,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
- /* If defrag is NULL, it should be assumed that migration is triggered
- * from client */
- defrag = conf->defrag;
-
- /* migration of files from clients is restricted to non-tiered clients
- * for now */
- if (!defrag && dht_is_tier_xlator(this)) {
- ret = ENOTSUP;
- goto out;
- }
-
- if (defrag && defrag->tier_conf.is_tier)
- log_level = GF_LOG_TRACE;
-
gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
loc->path, from->name, to->name);
@@ -1627,6 +1575,10 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
loc->path);
}
+ /* The file is locked to prevent a rename during a migration. Renames
+ * and migrations on the file at the same time can lead to data loss.
+ */
+
ret = dht_build_parent_loc(this, &parent_loc, loc, fop_errno);
if (ret < 0) {
ret = -1;
@@ -1727,9 +1679,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
/* create the destination, with required modes/xattr */
ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
- fop_errno);
+ fop_errno, file_has_holes);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"Create dst failed"
@@ -1773,8 +1729,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
* destination. We need to do update this only post migration
* as in case of failure the linkto needs to point to the source
* subvol */
- ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf,
- &dst_fd, fop_errno);
+ ret = __dht_rebalance_create_dst_file(
+ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
"Create dst failed"
@@ -1861,9 +1817,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = 0;
goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
stbuf.ia_size, file_has_holes,
@@ -1878,7 +1831,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
/* TODO: Sync the locks */
- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
if (ret) {
gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
loc->path, to->name, strerror(-ret));
@@ -2321,14 +2282,12 @@ out:
}
}
- if (!dht_is_tier_xlator(this)) {
- lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES,
- NULL, NULL);
- if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
- gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
- "%s: removexattr failed key %s", loc->path,
- GF_PROTECT_FROM_EXTERNAL_WRITES);
- }
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
}
if (dict)
@@ -2341,11 +2300,15 @@ out:
if (dst_fd)
syncop_close(dst_fd);
+
if (src_fd)
syncop_close(src_fd);
if (linkto_fd)
syncop_close(linkto_fd);
+ if (xdata)
+ dict_unref(xdata);
+
loc_wipe(&tmp_loc);
loc_wipe(&parent_loc);
@@ -2440,15 +2403,12 @@ void
dht_build_root_inode(xlator_t *this, inode_t **inode)
{
inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
itable = inode_table_new(0, this);
if (!itable)
return;
- root_gfid[15] = 1;
*inode = inode_find(itable, root_gfid);
}
@@ -2578,10 +2538,10 @@ out:
* all hardlinks.
*/
-int
+gf_boolean_t
gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = 0;
+ gf_boolean_t ret = _gf_false;
int i = local_subvol_index;
char *str = NULL;
uint32_t hashval = 0;
@@ -2603,12 +2563,11 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
}
str = uuid_utoa_r(gfid, buf);
- ret = dht_hash_compute(this, 0, str, &hashval);
- if (ret == 0) {
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
index = (hashval % entry->count);
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
@@ -2621,12 +2580,12 @@ gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
/* None of the bricks in the subvol are up.
* CHILD_DOWN will kill the process soon */
- return 0;
+ return _gf_false;
}
if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
/* Index matches this node's nodeuuid.*/
- ret = 1;
+ ret = _gf_true;
goto out;
}
}
@@ -2675,6 +2634,7 @@ gf_defrag_migrate_single_file(void *opaque)
struct iatt *iatt_ptr = NULL;
gf_boolean_t update_skippedcount = _gf_true;
int i = 0;
+ gf_boolean_t should_i_migrate = 0;
rebal_entry = (struct dht_container *)opaque;
if (!rebal_entry) {
@@ -2729,17 +2689,29 @@ gf_defrag_migrate_single_file(void *opaque)
goto out;
}
- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index,
- entry->d_stat.ia_gfid)) {
- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
- goto out;
- }
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
gf_uuid_copy(entry_loc.pargfid, loc->gfid);
ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s lookup failed", entry_loc.path);
@@ -2902,8 +2874,7 @@ gf_defrag_migrate_single_file(void *opaque)
if (defrag->stats == _gf_true) {
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
+ elapsed = gf_tvdiff(&start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration of "
"file:%s size:%" PRIu64
@@ -3082,9 +3053,9 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
dht_conf_t *conf, gf_defrag_info_t *defrag,
fd_t *fd, dict_t *migrate_data,
struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
- int *should_commit_hash, int *perrno)
+ int *perrno)
{
- int ret = -1;
+ int ret = 0;
char is_linkfile = 0;
gf_dirent_t *df_entry = NULL;
struct dht_container *tmp_container = NULL;
@@ -3100,6 +3071,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i,
}
if (dir_dfmeta->fetch_entries[i] == 1) {
+ if (!fd) {
+ dir_dfmeta->fetch_entries[i] = 0;
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
+
ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
dir_dfmeta->offset_var[i].offset,
&(dir_dfmeta->equeue[i]), xattr_req, NULL);
@@ -3259,7 +3237,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *migrate_data, int *perrno)
{
int ret = -1;
- fd_t *fd = NULL;
dht_conf_t *conf = NULL;
gf_dirent_t entries;
dict_t *xattr_req = NULL;
@@ -3280,7 +3257,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
int dfc_index = 0;
int throttle_up = 0;
struct dir_dfmeta *dir_dfmeta = NULL;
- int should_commit_hash = 1;
+ xlator_t *old_THIS = NULL;
gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
gettimeofday(&dir_start, NULL);
@@ -3293,28 +3270,53 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}
- fd = fd_create(loc->inode, defrag->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ old_THIS = THIS;
+ THIS = this;
+
+ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
ret = -1;
goto out;
}
- ret = syncop_opendir(this, loc, fd, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED,
- "Migrate data failed: Failed to open dir %s", loc->path);
- *perrno = -ret;
+ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->lfd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta", NULL);
ret = -1;
+ *perrno = ENOMEM;
goto out;
}
- fd_bind(fd);
- dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
- if (!dir_dfmeta) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
- ret = -1;
- goto out;
+ for (i = 0; i < local_subvols_cnt; i++) {
+ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
+ if (!dir_dfmeta->lfd[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED,
+ NULL);
+ *perrno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
+ NULL, NULL);
+ if (ret) {
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN,
+ "dir: %s", loc->path, "subvol: %s",
+ conf->local_subvols[i]->name, NULL);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ fd_bind(dir_dfmeta->lfd[i]);
+ }
}
dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
@@ -3349,6 +3351,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
}
+
ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -3361,7 +3364,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
gf_common_mt_int);
if (!dir_dfmeta->fetch_entries) {
- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta->fetch_entries", NULL);
ret = -1;
goto out;
}
@@ -3431,8 +3435,13 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
- defrag, fd, migrate_data, dir_dfmeta,
- xattr_req, &should_commit_hash, perrno);
+ defrag, dir_dfmeta->lfd[dfc_index],
+ migrate_data, dir_dfmeta, xattr_req,
+ perrno);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
+ goto out;
+ }
if (ret) {
gf_log(this->name, GF_LOG_WARNING,
@@ -3472,27 +3481,19 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
gettimeofday(&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
+ elapsed = gf_tvdiff(&dir_start, &end);
gf_log(this->name, GF_LOG_INFO,
"Migration operation on dir %s took "
"%.2f secs",
loc->path, elapsed / 1e6);
ret = 0;
out:
-
+ THIS = old_THIS;
gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
if (xattr_req)
dict_unref(xattr_req);
- if (fd)
- fd_unref(fd);
-
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
/* It does not matter if it errored out - this number is
* used to calculate rebalance estimated time to complete.
* No locking required as dirs are processed by a single thread.
@@ -3500,6 +3501,7 @@ out:
defrag->num_dirs_processed++;
return ret;
}
+
int
gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout)
@@ -3514,7 +3516,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
* rebalance is complete.
*/
if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
- defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
return 0;
}
@@ -3560,114 +3561,6 @@ gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
return 0;
}
-/* Function for doing a named lookup on file inodes during an attach tier
- * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal
- * happens on pre-existing data. This is required so that the ctr database has
- * hardlinks of all the exisitng file in the volume. CTR xlator on the
- * brick/server side does db update/insert of the hardlink on a namelookup.
- * Currently the namedlookup is done synchronous to the fixlayout that is
- * triggered by attach tier. This is not performant, adding more time to
- * fixlayout. The performant approach is record the hardlinks on a compressed
- * datastore and then do the namelookup asynchronously later, giving the ctr db
- * eventual consistency
- * */
-int
-gf_fix_layout_tier_attach_lookup(xlator_t *this, loc_t *parent_loc,
- gf_dirent_t *file_dentry)
-{
- int ret = -1;
- dict_t *lookup_xdata = NULL;
- dht_conf_t *conf = NULL;
- loc_t file_loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- GF_VALIDATE_OR_GOTO(this->name, parent_loc, out);
-
- GF_VALIDATE_OR_GOTO(this->name, file_dentry, out);
-
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- if (!parent_loc->inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s parent is NULL", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- conf = this->private;
-
- loc_wipe(&file_loc);
-
- if (gf_uuid_is_null(file_dentry->d_stat.ia_gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s gfid not present", parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.gfid, file_dentry->d_stat.ia_gfid);
-
- if (gf_uuid_is_null(parent_loc->gfid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "%s/%s"
- " gfid not present",
- parent_loc->path, file_dentry->d_name);
- goto out;
- }
-
- gf_uuid_copy(file_loc.pargfid, parent_loc->gfid);
-
- ret = dht_build_child_loc(this, &file_loc, parent_loc, file_dentry->d_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Child loc build failed");
- ret = -1;
- goto out;
- }
-
- lookup_xdata = dict_new();
- if (!lookup_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed creating lookup dict for %s", file_dentry->d_name);
- goto out;
- }
-
- ret = dict_set_int32(lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set lookup flag");
- goto out;
- }
-
- gf_uuid_copy(file_loc.parent->gfid, parent_loc->gfid);
-
- /* Sending lookup to cold tier only */
- ret = syncop_lookup(conf->subvolumes[0], &file_loc, &iatt, NULL,
- lookup_xdata, NULL);
- if (ret) {
- /* If the file does not exist on the cold tier than it must */
- /* have been discovered on the hot tier. This is not an error. */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s lookup to cold tier on attach heal failed", file_loc.path);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- loc_wipe(&file_loc);
-
- if (lookup_xdata)
- dict_unref(lookup_xdata);
-
- return ret;
-}
-
int
gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *fix_layout, dict_t *migrate_data)
@@ -3687,7 +3580,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
};
inode_t *linked_inode = NULL, *inode = NULL;
dht_conf_t *conf = NULL;
- int should_commit_hash = 1;
int perrno = 0;
conf = this->private;
@@ -3790,16 +3682,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
if (!IA_ISDIR(entry->d_stat.ia_type)) {
- /* If its a fix layout during the attach
- * tier operation do lookups on files
- * on cold subvolume so that there is a
- * CTR DB Lookup Heal triggered on existing
- * data.
- * */
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- gf_fix_layout_tier_attach_lookup(this, loc, entry);
- }
-
continue;
}
loc_wipe(&entry_loc);
@@ -3816,8 +3698,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
} else {
- should_commit_hash = 0;
-
continue;
}
}
@@ -3880,7 +3760,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = -1;
goto out;
} else {
- should_commit_hash = 0;
continue;
}
}
@@ -3893,7 +3772,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
migrate_data);
- if (ret && ret != 2) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED ||
+ defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) {
+ goto out;
+ }
+
+ if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
"Fix layout failed for %s", entry_loc.path);
@@ -3916,7 +3800,25 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
INIT_LIST_HEAD(&entries.list);
}
+ /* A directory layout is fixed only after its subdirs are healed to
+ * any newly added bricks. If the layout is fixed before subdirs are
+ * healed, the newly added brick will get a non-null layout.
+ * Any subdirs which hash to that layout will no longer show up
+ * in a directory listing until they are healed.
+ */
+
ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+
+ /* In case of a race where the directory is deleted just before
+ * layout setxattr, the errors are updated in the layout structure.
+ * We can use this information to make a decision whether the directory
+ * is deleted entirely.
+ */
+ if (ret == 0) {
+ ret = dht_dir_layout_error_check(this, loc->inode);
+ ret = -ret;
+ }
+
if (ret) {
if (-ret == ENOENT || -ret == ESTALE) {
gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
@@ -3927,6 +3829,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
defrag->total_failures++;
}
ret = 0;
+ goto out;
} else {
gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
"Setxattr failed for %s", loc->path);
@@ -3941,11 +3844,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
}
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
- if (ret && (ret != 2)) {
+ if (ret) {
if (perrno == ENOENT || perrno == ESTALE) {
ret = 0;
goto out;
@@ -3961,18 +3863,13 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
if (conf->decommission_in_progress) {
goto out;
}
-
- should_commit_hash = 0;
}
- } else if (ret == 2) {
- should_commit_hash = 0;
}
}
gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
- if (should_commit_hash &&
- gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
defrag->total_failures++;
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
@@ -3996,245 +3893,34 @@ out:
if (fd)
fd_unref(fd);
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
- return ret;
-}
-
-/******************************************************************************
- * Tier background Fix layout functions
- ******************************************************************************/
-/* This is the background tier fixlayout thread */
-void *
-gf_tier_do_fix_layout(void *args)
-{
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = args;
- int ret = -1;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- dict_t *dict = NULL;
- loc_t loc = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
- struct iatt parent = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg, out);
- GF_VALIDATE_OR_GOTO("tier", tier_fix_layout_arg->this, out);
- this = tier_fix_layout_arg->this;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag->root_inode, out);
-
- GF_VALIDATE_OR_GOTO(this->name, tier_fix_layout_arg->fix_layout, out);
-
- /* Get Root loc_t */
- dht_build_root_loc(defrag->root_inode, &loc);
- ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED,
- "Lookup on root failed.");
- ret = -1;
- goto out;
- }
-
- /* Start the crawl */
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering Fixlayout started");
-
- ret = gf_defrag_fix_layout(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout, NULL);
- if (ret && ret != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Tiering fixlayout failed.");
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc,
- tier_fix_layout_arg->fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- dict = dict_new();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_str(dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY, "yes");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_FAILED,
- "Failed to set dictionary value: key = %s",
- GF_XATTR_TIER_LAYOUT_FIXED_KEY);
- ret = -1;
- goto out;
- }
-
- /* Marking the completion of tiering fix layout via a xattr on root */
- ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to set tiering fix "
- "layout completed xattr on %s",
- loc.path);
- ret = -1;
- goto out;
- }
-
- ret = 0;
-out:
- if (ret && defrag)
- defrag->total_failures++;
-
- if (dict)
- dict_unref(dict);
-
- return NULL;
-}
-
-int
-gf_tier_start_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag,
- dict_t *fix_layout)
-{
- int ret = -1;
- dict_t *tier_dict = NULL;
- gf_tier_fix_layout_arg_t *tier_fix_layout_arg = NULL;
-
- tier_dict = dict_new();
- if (!tier_dict) {
- gf_log("tier", GF_LOG_ERROR,
- "Tier fix layout failed :"
- "Creation of tier_dict failed");
- ret = -1;
- goto out;
- }
-
- /* Check if layout is fixed already */
- ret = syncop_getxattr(this, loc, &tier_dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret != 0) {
- tier_fix_layout_arg = &defrag->tier_conf.tier_fix_layout_arg;
-
- /*Fill crawl arguments */
- tier_fix_layout_arg->this = this;
- tier_fix_layout_arg->fix_layout = fix_layout;
-
- /* Spawn the fix layout thread so that its done in the
- * background */
- ret = gf_thread_create(&tier_fix_layout_arg->thread_id, NULL,
- gf_tier_do_fix_layout, tier_fix_layout_arg,
- "tierfixl");
- if (ret) {
- gf_log("tier", GF_LOG_ERROR,
- "Thread creation failed. "
- "Background fix layout for tiering will not "
- "work.");
- defrag->total_failures++;
- goto out;
- }
- }
- ret = 0;
-out:
- if (tier_dict)
- dict_unref(tier_dict);
-
return ret;
}
-void
-gf_tier_clear_fix_layout(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- /* Check if background fixlayout is completed. This is not
- * multi-process safe i.e there is a possibility that by the time
- * we move to remove the xattr there it might have been cleared by some
- * other detach process from other node. We ignore the error if such
- * a thing happens */
- ret = syncop_getxattr(this, loc, &dict, GF_XATTR_TIER_LAYOUT_FIXED_KEY,
- NULL, NULL);
- if (ret) {
- /* Background fixlayout not complete - nothing to clear*/
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to retrieve fixlayout xattr."
- "Assume background fix layout not complete");
- goto out;
- }
-
- ret = syncop_removexattr(this, loc, GF_XATTR_TIER_LAYOUT_FIXED_KEY, NULL,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Failed removing tier fix layout "
- "xattr from %s",
- loc->path);
- goto out;
- }
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-}
-
-void
-gf_tier_wait_fix_lookup(gf_defrag_info_t *defrag)
-{
- if (defrag->tier_conf.tier_fix_layout_arg.thread_id) {
- pthread_join(defrag->tier_conf.tier_fix_layout_arg.thread_id, NULL);
- }
-}
-/******************Tier background Fix layout functions END********************/
-
int
dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
loc_t *loc)
{
dict_t *dict = NULL;
- gf_defrag_info_t *defrag = NULL;
uuid_t *uuid_ptr = NULL;
int ret = -1;
int i = 0;
int j = 0;
- defrag = conf->defrag;
-
- if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) {
- /* Find local subvolumes */
- ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL,
- NULL, NULL);
- if (ret && (ret != -ENODATA)) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
- "local "
- "subvolume determination failed with error: %d",
- -ret);
- ret = -1;
- goto out;
- }
-
- if (!ret)
- goto out;
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
}
+ if (!ret)
+ goto out;
+
ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
NULL, NULL);
if (ret) {
@@ -4325,9 +4011,6 @@ dht_file_counter_thread(void *args)
struct timespec time_to_wait = {
0,
};
- struct timeval now = {
- 0,
- };
uint64_t tmp_size = 0;
if (!args)
@@ -4337,9 +4020,8 @@ dht_file_counter_thread(void *args)
dht_build_root_loc(defrag->root_inode, &root_loc);
while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
- gettimeofday(&now, NULL);
- time_to_wait.tv_sec = now.tv_sec + 600;
- time_to_wait.tv_nsec = 0;
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
pthread_mutex_lock(&defrag->fc_mutex);
pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
@@ -4412,7 +4094,7 @@ gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
goto out;
}
- ret = gf_thread_create(filecnt_thread, NULL, &dht_file_counter_thread,
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
(void *)defrag, "dhtfcnt");
if (ret) {
@@ -4436,9 +4118,6 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
int thread_spawn_count = 0;
int index = 0;
pthread_t *tid = NULL;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
if (!defrag)
goto out;
@@ -4472,10 +4151,8 @@ gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
/*Spawn Threads Here*/
while (index < thread_spawn_count) {
- snprintf(thread_name, sizeof(thread_name), "dhtmig%d",
- ((index + 1) & 0x3ff));
- ret = gf_thread_create(&(tid[index]), NULL, &gf_defrag_task,
- (void *)defrag, thread_name);
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
+ (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
if (ret != 0) {
gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
index);
@@ -4548,7 +4225,6 @@ gf_defrag_start_crawl(void *data)
dict_t *migrate_data = NULL;
dict_t *status = NULL;
glusterfs_ctx_t *ctx = NULL;
- dht_methods_t *methods = NULL;
call_frame_t *statfs_frame = NULL;
xlator_t *old_THIS = NULL;
int ret = -1;
@@ -4564,7 +4240,6 @@ gf_defrag_start_crawl(void *data)
int thread_index = 0;
pthread_t *tid = NULL;
pthread_t filecnt_thread;
- gf_boolean_t is_tier_detach = _gf_false;
gf_boolean_t fc_thread_started = _gf_false;
this = data;
@@ -4583,7 +4258,8 @@ gf_defrag_start_crawl(void *data)
if (!defrag)
goto exit;
- gettimeofday(&defrag->start_time, NULL);
+ defrag->start_time = gf_time();
+
dht_build_root_inode(this, &defrag->root_inode);
if (!defrag->root_inode)
goto out;
@@ -4717,43 +4393,17 @@ gf_defrag_start_crawl(void *data)
}
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Fix layout for attach tier */
- ret = gf_tier_start_fix_layout(this, &loc, defrag, fix_layout);
- if (ret) {
- goto out;
- }
-
- methods = &(conf->methods);
-
- /* Calling tier_start of tier.c */
- methods->migration_other(this, defrag);
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
- "force");
- if (ret)
- goto out;
- }
- } else {
- ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
-
- if (ret != 2 &&
- gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START)
- is_tier_detach = _gf_true;
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
gf_log("DHT", GF_LOG_INFO, "crawling file-system completed");
@@ -4767,19 +4417,6 @@ out:
defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
- /* Wait for the tier fixlayout to
- * complete if its was started.*/
- gf_tier_wait_fix_lookup(defrag);
- }
-
- if (is_tier_detach && ret == 0) {
- /* If it was a detach remove the tier fix-layout
- * xattr on root. Ignoring the failure, as nothing has to be
- * done, logging is done in gf_tier_clear_fix_layout */
- gf_tier_clear_fix_layout(this, &loc, defrag);
- }
-
gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index);
if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
@@ -4793,9 +4430,9 @@ out:
dht_send_rebalance_event(this, defrag->cmd, defrag->defrag_status);
+ status = dict_new();
LOCK(&defrag->lock);
{
- status = dict_new();
gf_defrag_status_get(conf, status);
if (ctx && ctx->notify)
ctx->notify(GF_EN_DEFRAG_STATUS, status);
@@ -4878,9 +4515,6 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
uint64_t total_processed = 0;
uint64_t tmp_count = 0;
uint64_t time_to_complete = 0;
- struct timeval now = {
- 0,
- };
double elapsed = 0;
defrag = conf->defrag;
@@ -4888,8 +4522,7 @@ gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
if (!g_totalsize)
goto out;
- gettimeofday(&now, NULL);
- elapsed = now.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* Don't calculate the estimates for the first 10 minutes.
* It is unlikely to be accurate and estimates are not required
@@ -4939,13 +4572,8 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
uint64_t lookup = 0;
uint64_t failures = 0;
uint64_t skipped = 0;
- uint64_t promoted = 0;
- uint64_t demoted = 0;
char *status = "";
double elapsed = 0;
- struct timeval end = {
- 0,
- };
uint64_t time_to_complete = 0;
uint64_t time_left = 0;
gf_defrag_info_t *defrag = conf->defrag;
@@ -4962,17 +4590,12 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
lookup = defrag->num_files_lookedup;
failures = defrag->total_failures;
skipped = defrag->skipped;
- promoted = defrag->total_files_promoted;
- demoted = defrag->total_files_demoted;
- gettimeofday(&end, NULL);
-
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ elapsed = gf_time() - defrag->start_time;
/* The rebalance is still in progress */
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
if (time_to_complete && (time_to_complete > elapsed))
@@ -4987,14 +4610,6 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
if (!dict)
goto log;
- ret = dict_set_uint64(dict, "promoted", promoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set promoted count");
-
- ret = dict_set_uint64(dict, "demoted", demoted);
- if (ret)
- gf_log(THIS->name, GF_LOG_WARNING, "failed to set demoted count");
-
ret = dict_set_uint64(dict, "files", files);
if (ret)
gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
@@ -5060,159 +4675,6 @@ out:
return 0;
}
-void
-gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state)
-{
- pthread_mutex_lock(&tier_conf->pause_mutex);
- tier_conf->pause_state = state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-}
-
-tier_pause_state_t
-gf_defrag_get_pause_state(gf_tier_conf_t *tier_conf)
-{
- int state;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- state = tier_conf->pause_state;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-tier_pause_state_t
-gf_defrag_check_pause_tier(gf_tier_conf_t *tier_conf)
-{
- int woke = 0;
- int state = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
-
- if (tier_conf->pause_state == TIER_RUNNING)
- goto out;
-
- if (tier_conf->pause_state == TIER_PAUSED)
- goto out;
-
- if (tier_conf->promote_in_progress || tier_conf->demote_in_progress)
- goto out;
-
- tier_conf->pause_state = TIER_PAUSED;
-
- if (tier_conf->pause_synctask) {
- synctask_wake(tier_conf->pause_synctask);
- tier_conf->pause_synctask = 0;
- woke = 1;
- }
-
- gf_msg("tier", GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED, "woken %d", woke);
-
- gf_event(EVENT_TIER_PAUSE, "vol=%s", tier_conf->volname);
-out:
- state = tier_conf->pause_state;
-
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return state;
-}
-
-void
-gf_defrag_pause_tier_timeout(void *data)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
-
- this = (xlator_t *)data;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause timer timeout");
-
- gf_defrag_check_pause_tier(&defrag->tier_conf);
-
-out:
- return;
-}
-
-int
-gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- int ret = 0;
- struct timespec delta = {
- 0,
- };
- int delay = 2;
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
- goto out;
-
- /*
- * Set flag requesting to pause tiering. Wait 'delay' seconds for
- * tiering to actually stop as indicated by the pause state
- * before returning success or failure.
- */
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- /*
- * If migration is not underway, can pause immediately.
- */
- gf_defrag_check_pause_tier(&defrag->tier_conf);
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Request pause tier");
-
- defrag->tier_conf.pause_synctask = synctask_get();
- delta.tv_sec = delay;
- delta.tv_nsec = 0;
- defrag->tier_conf.pause_timer = gf_timer_call_after(
- this->ctx, delta, gf_defrag_pause_tier_timeout, this);
-
- synctask_yield(defrag->tier_conf.pause_synctask);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED)
- goto out;
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = -1;
-out:
-
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_PAUSED,
- "Pause tiering ret=%d", ret);
-
- return ret;
-}
-
-int
-gf_defrag_resume_tier(xlator_t *this, gf_defrag_info_t *defrag)
-{
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_TIER_RESUME,
- "Pause end. Resume tiering");
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- gf_event(EVENT_TIER_RESUME, "vol=%s", defrag->tier_conf.volname);
-
- return 0;
-}
-
-int
-gf_defrag_start_detach_tier(gf_defrag_info_t *defrag)
-{
- defrag->cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
-
- return 0;
-}
-
int
gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
{
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index af342bdbe21..d9dbf50492f 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -11,11 +11,9 @@
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
* delete the newpath if it gets EEXISTS from link() call.
*/
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
#include "dht-lock.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
int
dht_rename_unlock(call_frame_t *frame, xlator_t *this);
@@ -505,6 +503,8 @@ dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
uuid_utoa_r(local->loc.pargfid, src);
else if (local->loc.parent)
uuid_utoa_r(local->loc.parent->gfid, src);
+ else
+ src[0] = '\0';
strcat(src, local->loc.name);
@@ -520,6 +520,8 @@ dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
uuid_utoa_r(local->loc2.pargfid, dst);
else if (local->loc2.parent)
uuid_utoa_r(local->loc2.parent->gfid, dst);
+ else
+ dst[0] = '\0';
strcat(dst, local->loc2.name);
ret = strcmp(src, dst);
@@ -1009,9 +1011,11 @@ dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
xlator_t *prev = NULL;
dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
prev = cookie;
local = frame->local;
+ main_frame = local->main_frame;
/* TODO: Handle this case in lookup-optimize */
if (op_ret == -1) {
@@ -1024,7 +1028,8 @@ dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dht_linkfile_attr_heal(frame, this);
}
- dht_rename_unlink(frame, this);
+ dht_rename_unlink(main_frame, this);
+ DHT_STACK_DESTROY(frame);
return 0;
}
@@ -1040,7 +1045,8 @@ dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
xlator_t *src_cached = NULL;
xlator_t *dst_hashed = NULL;
xlator_t *dst_cached = NULL;
- loc_t link_loc = {0};
+ call_frame_t *link_frame = NULL;
+ dht_local_t *link_local = NULL;
local = frame->local;
prev = cookie;
@@ -1110,18 +1116,36 @@ dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
/* Create the linkto file for the dst file */
if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
- loc_copy(&link_loc, &local->loc2);
- if (link_loc.inode)
- inode_unref(link_loc.inode);
- link_loc.inode = inode_ref(local->loc.inode);
- gf_uuid_copy(local->gfid, local->loc.inode->gfid);
- gf_uuid_copy(link_loc.gfid, local->loc.inode->gfid);
-
- dht_linkfile_create(frame, dht_rename_links_create_cbk, this,
- src_cached, dst_hashed, &link_loc);
+ link_frame = copy_frame(frame);
+ if (!link_frame) {
+ goto unlink;
+ }
+
+ /* fop value sent as maxvalue because it is not used
+ * anywhere in this case */
+ link_local = dht_local_init(link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
+ if (!link_local) {
+ goto unlink;
+ }
+
+ if (link_local->loc.inode)
+ inode_unref(link_local->loc.inode);
+ link_local->loc.inode = inode_ref(local->loc.inode);
+ link_local->main_frame = frame;
+ link_local->stbuf = local->stbuf;
+ gf_uuid_copy(link_local->gfid, local->loc.inode->gfid);
+
+ dht_linkfile_create(link_frame, dht_rename_links_create_cbk, this,
+ src_cached, dst_hashed, &link_local->loc);
return 0;
}
+unlink:
+
+ if (link_frame) {
+ DHT_STACK_DESTROY(link_frame);
+ }
dht_rename_unlink(frame, this);
return 0;
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 0e57eab5f7f..3e24065227c 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -8,12 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "dht-common.h"
-#include "dht-messages.h"
#include "dht-lock.h"
-#include "glusterfs-acl.h"
#define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \
do { \
@@ -22,7 +17,7 @@
layout->list[i].commit_hash = layout->commit_hash; \
\
gf_msg_trace(this->name, 0, \
- "gave fix: %u - %u, with commit-hash %u" \
+ "gave fix: 0x%x - 0x%x, with commit-hash 0x%x" \
" on %s for %s", \
layout->list[i].start, layout->list[i].stop, \
layout->list[i].commit_hash, \
@@ -38,7 +33,7 @@
} \
} while (0)
-int
+static int
dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
gf_boolean_t newdir, dht_selfheal_layout_t healer,
dht_need_heal_t should_heal);
@@ -149,8 +144,8 @@ dht_refresh_layout_done(call_frame_t *frame)
ret = dht_layout_sort(refreshed);
if (ret == -1) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
- "sorting the layout failed");
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED, NULL);
goto err;
}
@@ -206,10 +201,9 @@ dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == -1) {
gf_uuid_unparse(local->loc.gfid, gfid);
local->op_errno = op_errno;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_FILE_LOOKUP_FAILED,
- "lookup of %s on %s returned error, gfid: %s",
- local->loc.path, prev->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_FILE_LOOKUP_FAILED, "path=%s", local->loc.path,
+ "name=%s", prev->name, "gfid=%s", gfid, NULL);
goto unlock;
}
@@ -270,9 +264,8 @@ dht_refresh_layout(call_frame_t *frame)
conf->subvolume_cnt);
if (!local->selfheal.refreshed_layout) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation for layout failed, path:%s gfid:%s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -284,9 +277,8 @@ dht_refresh_layout(call_frame_t *frame)
gf_uuid_unparse(local->loc.gfid, gfid);
local->xattr_req = dict_new();
if (local->xattr_req == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "dict mem allocation failed, path:%s gfid:%s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
}
@@ -294,9 +286,9 @@ dht_refresh_layout(call_frame_t *frame)
if (dict_get(local->xattr_req, conf->xattr_name) == 0) {
ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:key = %s",
- local->loc.path, conf->xattr_name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s", conf->xattr_name,
+ NULL);
}
for (i = 0; i < call_cnt; i++) {
@@ -529,7 +521,7 @@ out:
return fixit;
}
-int
+static int
dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
gf_boolean_t newdir, dht_selfheal_layout_t healer,
dht_need_heal_t should_heal)
@@ -561,10 +553,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
if (lk_array == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path: %s",
- gfid, local->loc.path);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
@@ -574,10 +564,9 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR);
if (lk_array[i] == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation "
- "failed for lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_MEM_ALLOC_FAILED, "lk_array-gfid=%s", gfid,
+ "path=%s", local->loc.path, NULL);
goto err;
}
}
@@ -586,10 +575,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
if (lk_array == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
@@ -598,10 +585,8 @@ dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
NULL, FAIL_ON_ANY_ERROR);
if (lk_array[0] == NULL) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for "
- "lk_array, gfid:%s path:%s",
- gfid, local->loc.path);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
goto err;
}
}
@@ -627,7 +612,7 @@ err:
return -1;
}
-int
+static int
dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -649,10 +634,9 @@ dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
err = 0;
} else {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "layout setxattr failed on %s, path:%s gfid:%s", subvol->name,
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "name=%s", subvol->name,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
err = op_errno;
}
@@ -699,7 +683,7 @@ dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
return ret;
}
-int
+static int
dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout, int i,
xlator_t *req_subvol)
@@ -741,19 +725,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, GLUSTERFS_INTERNAL_FOP_KEY, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
goto err;
}
ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value: key = %s,"
- " gfid = %s",
- loc->path, DHT_IATT_IN_XDATA_KEY, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", DHT_IATT_IN_XDATA_KEY,
+ "gfid=%s", gfid, NULL);
goto err;
}
@@ -761,27 +743,27 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
ret = dht_disk_layout_extract(this, layout, i, &disk_layout);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- " %s: (subvol %s) Failed to extract disk layout,"
- " gfid = %s",
- loc->path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "extract-disk-layout-failed, path=%s", loc->path, "subvol=%s",
+ subvol->name, "gfid=%s", gfid, NULL);
goto err;
}
ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr dictionary,"
- " gfid = %s",
- loc->path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", loc->path,
+ "subvol=%s", subvol->name,
+ "set-xattr-dictionary-failed"
+ "gfid=%s",
+ gfid, NULL);
goto err;
}
disk_layout = NULL;
gf_msg_trace(this->name, 0,
- "setting hash range %u - %u (type %d) on subvolume %s"
+ "setting hash range 0x%x - 0x%x (type %d) on subvolume %s"
" for %s",
layout->list[i].start, layout->list[i].stop, layout->type,
subvol->name, loc->path);
@@ -791,20 +773,17 @@ dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
if (data) {
ret = dict_add(xattr, QUOTA_LIMIT_KEY, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_KEY);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_KEY, NULL);
}
}
data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY);
if (data) {
ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = %s",
- loc->path, QUOTA_LIMIT_OBJECTS_KEY);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_OBJECTS_KEY,
+ NULL);
}
}
}
@@ -833,7 +812,7 @@ err:
return 0;
}
-int
+static int
dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -882,7 +861,7 @@ out:
return 0;
}
-int
+static int
dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -942,9 +921,8 @@ dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
dummy = dht_layout_new(this, 1);
if (!dummy) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "failed to allocate dummy layout, path:%s gfid:%s", loc->path,
- gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DUMMY_ALLOC_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
goto out;
}
for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
@@ -960,38 +938,6 @@ out:
return 0;
}
-gf_boolean_t
-dht_is_subvol_part_of_layout(dht_layout_t *layout, xlator_t *xlator)
-{
- int i = 0;
- gf_boolean_t ret = _gf_false;
-
- for (i = 0; i < layout->cnt; i++) {
- if (!strcmp(layout->list[i].xlator->name, xlator->name)) {
- ret = _gf_true;
- break;
- }
- }
-
- return ret;
-}
-
-int
-dht_layout_index_from_conf(dht_layout_t *layout, xlator_t *xlator)
-{
- int i = -1;
- int j = 0;
-
- for (j = 0; j < layout->cnt; j++) {
- if (!strcmp(layout->list[j].xlator->name, xlator->name)) {
- i = j;
- break;
- }
- }
-
- return i;
-}
-
int
dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *statpre,
@@ -1033,18 +979,27 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
int missing_attr = 0;
int i = 0, ret = -1;
dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
xlator_t *this = NULL;
int cnt = 0;
local = frame->local;
this = frame->this;
+ conf = this->private;
+
+ /* We need to heal the attrs if:
+ * 1. Any directories were missing - the newly created dirs will need
+ * to have the correct attrs set
+ * 2. An existing dir does not have the correct permissions -they may
+ * have been changed when a brick was down.
+ */
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].err == -1)
missing_attr++;
}
- if (missing_attr == 0) {
+ if ((missing_attr == 0) && (local->need_attrheal == 0)) {
if (!local->heal_layout) {
gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
loc->path, uuid_utoa(loc->gfid));
@@ -1062,25 +1017,18 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
return 0;
}
- local->call_cnt = missing_attr;
- cnt = layout->cnt;
+ cnt = local->call_cnt = conf->subvolume_cnt;
for (i = 0; i < cnt; i++) {
- if (layout->list[i].err == -1) {
- gf_msg_trace(this->name, 0, "%s: setattr on subvol %s, gfid = %s",
- loc->path, layout->list[i].xlator->name,
- uuid_utoa(loc->gfid));
-
- STACK_WIND(
- frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr, loc, stbuf, valid, NULL);
- }
+ STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr, loc, stbuf, valid,
+ NULL);
}
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, struct iatt *preparent,
@@ -1110,11 +1058,10 @@ dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name,
- ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING), op_errno,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: path = %s, gfid = %s",
- local->loc.path, gfid);
+ gf_smsg(this->name,
+ ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING),
+ op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
dht_iatt_merge(this, &local->preparent, preparent);
@@ -1133,89 +1080,7 @@ out:
return 0;
}
-void
-dht_selfheal_dir_mkdir_setacl(dict_t *xattr, dict_t *dict)
-{
- data_t *acl_default = NULL;
- data_t *acl_access = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT(xattr);
- GF_ASSERT(dict);
-
- this = THIS;
- GF_ASSERT(this);
-
- acl_default = dict_get(xattr, POSIX_ACL_DEFAULT_XATTR);
-
- if (!acl_default) {
- gf_msg_debug(this->name, 0, "ACL_DEFAULT xattr not present");
- goto cont;
- }
- ret = dict_set(dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_DEFAULT_XATTR);
-cont:
- acl_access = dict_get(xattr, POSIX_ACL_ACCESS_XATTR);
- if (!acl_access) {
- gf_msg_debug(this->name, 0, "ACL_ACCESS xattr not present");
- goto out;
- }
- ret = dict_set(dict, POSIX_ACL_ACCESS_XATTR, acl_access);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- POSIX_ACL_ACCESS_XATTR);
-
-out:
- return;
-}
-
-void
-dht_selfheal_dir_mkdir_setquota(dict_t *src, dict_t *dst)
-{
- data_t *quota_limit_key = NULL;
- data_t *quota_limit_obj_key = NULL;
- xlator_t *this = NULL;
- int ret = -1;
-
- GF_ASSERT(src);
- GF_ASSERT(dst);
-
- this = THIS;
- GF_ASSERT(this);
-
- quota_limit_key = dict_get(src, QUOTA_LIMIT_KEY);
- if (!quota_limit_key) {
- gf_msg_debug(this->name, 0, "QUOTA_LIMIT_KEY xattr not present");
- goto cont;
- }
- ret = dict_set(dst, QUOTA_LIMIT_KEY, quota_limit_key);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s", QUOTA_LIMIT_KEY);
-
-cont:
- quota_limit_obj_key = dict_get(src, QUOTA_LIMIT_OBJECTS_KEY);
- if (!quota_limit_obj_key) {
- gf_msg_debug(this->name, 0,
- "QUOTA_LIMIT_OBJECTS_KEY xattr not present");
- goto out;
- }
- ret = dict_set(dst, QUOTA_LIMIT_OBJECTS_KEY, quota_limit_obj_key);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value.key = %s",
- QUOTA_LIMIT_OBJECTS_KEY);
-
-out:
- return;
-}
-
-int
+static int
dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -1239,10 +1104,8 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "%s: Failed to set dictionary value:"
- " key = gfid-req",
- loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=gfid-req", NULL);
} else if (local->params) {
/* Send the dictionary from higher layers directly */
@@ -1254,18 +1117,15 @@ dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL);
if (!dict) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "dict is NULL, need to make sure gfids are same");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_IS_NULL, NULL);
dict = dict_new();
if (!dict)
return -1;
}
ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value for"
- " key = %s at path: %s",
- GF_INTERNAL_CTX_KEY, loc->path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "key=%s",
+ GF_INTERNAL_CTX_KEY, "path=%s", loc->path, NULL);
/* We can still continue. As heal can still happen
* unless quota limits have reached for the dir.
*/
@@ -1297,7 +1157,7 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
inode_t *inode, struct iatt *stbuf,
@@ -1308,45 +1168,54 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
int this_call_cnt = 0;
int missing_dirs = 0;
dht_layout_t *layout = NULL;
- dht_conf_t *conf = 0;
+ xlator_t *prev = 0;
loc_t *loc = NULL;
- int check_mds = 0;
- int errst = 0;
- int32_t mds_xattr_val[1] = {0};
char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ int index = -1;
VALIDATE_OR_GOTO(this->private, err);
local = frame->local;
layout = local->layout;
loc = &local->loc;
- conf = this->private;
+ prev = cookie;
- if (local->gfid)
+ if (!gf_uuid_is_null(local->gfid))
gf_uuid_unparse(local->gfid, gfid_local);
- this_call_cnt = dht_frame_return(frame);
-
LOCK(&frame->lock);
{
+ index = dht_layout_index_for_subvol(layout, prev);
if ((op_ret < 0) && (op_errno == ENOENT || op_errno == ESTALE)) {
local->selfheal.hole_cnt = !local->selfheal.hole_cnt
? 1
: local->selfheal.hole_cnt + 1;
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = op_errno;
+ }
}
if (!op_ret) {
dht_iatt_merge(this, &local->stbuf, stbuf);
- }
- check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
- mds_xattr_val, 1, &errst);
- if (dict_get(xattr, conf->mds_xattr_key) && check_mds && !errst) {
- dict_unref(local->xattr);
- local->xattr = dict_ref(xattr);
+ if (prev == local->mds_subvol) {
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+ }
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = -1;
+ }
}
}
UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
+
if (is_last_call(this_call_cnt)) {
if (local->selfheal.hole_cnt == layout->cnt) {
gf_msg_debug(this->name, op_errno,
@@ -1382,7 +1251,7 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -1402,19 +1271,14 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
local->call_cnt = conf->subvolume_cnt;
if (op_ret < 0) {
- /* We get this error when the directory entry was not created
- * on a newky attached tier subvol. Hence proceed and do mkdir
- * on the tier subvol.
- */
if (op_errno == EINVAL) {
local->call_cnt = 1;
dht_selfheal_dir_mkdir_lookup_done(frame, this);
return 0;
}
- gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
- "acquiring entrylk after inodelk failed for %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
+ "path=%s", local->loc.path, NULL);
local->op_errno = op_errno;
goto err;
@@ -1428,10 +1292,8 @@ dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
ret = dict_set_int32(local->xattr_req, "list-xattr", 1);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary key list-xattr value "
- " for path %s ",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "path=%s",
+ local->loc.path, NULL);
for (i = 0; i < conf->subvolume_cnt; i++) {
if (mds_subvol && conf->subvolumes[i] == mds_subvol) {
@@ -1454,18 +1316,21 @@ err:
return 0;
}
-int
+static int
dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
int force)
{
int missing_dirs = 0;
int i = 0;
+ int op_errno = 0;
int ret = -1;
dht_local_t *local = NULL;
xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
local = frame->local;
this = frame->this;
+ conf = this->private;
local->selfheal.force_mkdir = force;
local->selfheal.hole_cnt = 0;
@@ -1476,16 +1341,18 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
}
if (missing_dirs == 0) {
+ /* We don't need to create any directories. Proceed to heal the
+ * attrs and xattrs
+ */
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
if (local->need_xattr_heal) {
local->need_xattr_heal = 0;
- ret = dht_dir_xattr_heal(this, local);
- if (ret)
- gf_msg(this->name, GF_LOG_ERROR, ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "xattr heal failed for "
- "directory %s gfid %s ",
- local->loc.path, local->gfid);
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", local->gfid, NULL);
+ }
} else {
if (!gf_uuid_is_null(local->gfid))
gf_uuid_copy(loc->gfid, local->gfid);
@@ -1494,28 +1361,53 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
if (!ret)
return 0;
- gf_msg(this->name, GF_LOG_INFO, 0,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Failed to set mds xattr "
- "for directory %s gfid %s ",
- local->loc.path, local->gfid);
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SET_XATTR_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", local->gfid,
+ NULL);
}
}
dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout);
return 0;
}
- if (local->hashed_subvol == NULL)
- local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
+ }
+ }
if (local->hashed_subvol == NULL) {
- local->op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
- DHT_MSG_HASHED_SUBVOL_GET_FAILED,
- "(%s/%s) (path: %s): "
- "hashed subvolume not found",
- loc->pargfid, loc->name, loc->path);
- goto err;
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
}
local->current = &local->lock[0];
@@ -1531,7 +1423,7 @@ err:
return -1;
}
-int
+static int
dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc,
dht_layout_t *layout)
{
@@ -1627,7 +1519,7 @@ dht_get_layout_count(xlator_t *this, dht_layout_t *layout, int new_layout)
/* if layout->spread_cnt is set, check if it is <= available
* subvolumes (down brick and decommissioned bricks are considered
- * un-availbale). Else return count (available up bricks) */
+ * un-available). Else return count (available up bricks) */
count = ((layout->spread_cnt && (layout->spread_cnt <= count))
? layout->spread_cnt
: ((count) ? count : 1));
@@ -1640,8 +1532,6 @@ dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *new_layout);
void
-dht_layout_entry_swap(dht_layout_t *layout, int i, int j);
-void
dht_layout_range_swap(dht_layout_t *layout, int i, int j);
/*
@@ -1650,7 +1540,7 @@ dht_layout_range_swap(dht_layout_t *layout, int i, int j);
*/
#define OV_ENTRY(x, y) table[x * new->cnt + y]
-void
+static void
dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
dht_layout_t *new, dht_layout_t *old)
{
@@ -1727,7 +1617,7 @@ dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
}
}
-dht_layout_t *
+static dht_layout_t *
dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
dht_layout_t *layout)
{
@@ -1752,9 +1642,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
new_layout = dht_layout_new(this, priv->subvolume_cnt);
if (!new_layout) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "mem allocation failed for new_layout, path:%s gfid:%s",
- loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "new_layout, path=%s", loc->path, "gfid=%s", gfid, NULL);
goto done;
}
@@ -1764,10 +1653,9 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
if (subvol_down) {
gf_uuid_unparse(loc->gfid, gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
- "Layout fix failed: %u subvolume(s) are down"
- ". Skipping fix layout. path:%s gfid:%s",
- subvol_down, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "subvol-down=%u", subvol_down, "Skipping-fix-layout", "path=%s",
+ loc->path, "gfid=%s", gfid, NULL);
GF_FREE(new_layout);
return NULL;
}
@@ -1785,10 +1673,10 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
if (priv->du_stats) {
for (i = 0; i < priv->subvolume_cnt; ++i) {
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
- "subvolume %d (%s): %u chunks, path:%s", i,
- priv->subvolumes[i]->name, priv->du_stats[i].chunks,
- loc->path);
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
+ "index=%d", i, "name=%s", priv->subvolumes[i]->name,
+ "chunks=%u", priv->du_stats[i].chunks, "path=%s", loc->path,
+ NULL);
/* Maximize overlap if the bricks are all the same
* size.
@@ -1800,8 +1688,8 @@ dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
}
}
} else {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
- "no du stats ?!?");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
+ NULL);
}
/* First give it a layout as though it is a new directory. This
@@ -1832,7 +1720,7 @@ done:
* Having to call this 2x for each entry in the layout is pretty horrible, but
* that's what all of this layout-sorting nonsense gets us.
*/
-uint32_t
+static uint32_t
dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child)
{
dht_conf_t *priv = parent->private;
@@ -1950,7 +1838,7 @@ done:
return;
}
-int
+static int
dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
@@ -2009,9 +1897,8 @@ dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
linked_inode = inode_link(loc->inode, loc->parent, loc->name,
&local->stbuf);
if (!linked_inode) {
- gf_msg(frame->this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_FAILED,
- "linking inode failed (%s/%s) => %s", pgfid, loc->name, gfid);
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, NULL);
ret = -1;
goto out;
}
@@ -2069,10 +1956,10 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
loc_t *loc, dht_layout_t *layout)
{
dht_local_t *local = NULL;
+ xlator_t *this = NULL;
uint32_t down = 0;
uint32_t misc = 0;
int ret = 0;
- xlator_t *this = NULL;
char pgfid[GF_UUID_BUF_SIZE] = {0};
char gfid[GF_UUID_BUF_SIZE] = {0};
inode_t *linked_inode = NULL, *inode = NULL;
@@ -2083,6 +1970,20 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
local->selfheal.dir_cbk = dir_cbk;
local->selfheal.layout = dht_layout_ref(this, layout);
+ if (local->need_attrheal) {
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+
+ local->stbuf.ia_ctime = local->prebuf.ia_ctime;
+ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+
+ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
+ local->stbuf = local->mds_stbuf;
+ }
+ }
+
if (!__is_root_gfid(local->stbuf.ia_gfid)) {
gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
gf_uuid_unparse(loc->parent->gfid, pgfid);
@@ -2090,9 +1991,9 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
linked_inode = inode_link(loc->inode, loc->parent, loc->name,
&local->stbuf);
if (!linked_inode) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "linking inode failed (%s/%s) => %s", pgfid, loc->name,
- gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid,
+ NULL);
ret = 0;
goto sorry_no_fix;
}
@@ -2102,6 +2003,13 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
inode_unref(inode);
}
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+ NULL, NULL);
+ dict_unref(local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt,
&local->selfheal.overlaps_cnt,
&local->selfheal.missing_cnt, &local->selfheal.down,
@@ -2111,19 +2019,17 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
misc = local->selfheal.misc;
if (down) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed: %d subvolumes down."
- "Not fixing. path = %s, gfid = %s",
- down, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "subvol-down=%d", down, "Not-fixing",
+ "gfid=%s", gfid, NULL);
ret = 0;
goto sorry_no_fix;
}
if (misc) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED,
- "Directory selfheal failed : %d subvolumes "
- "have unrecoverable errors. path = %s, gfid = %s",
- misc, loc->path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "misc=%d", misc, "unrecoverable-errors",
+ "gfid=%s", gfid, NULL);
ret = 0;
goto sorry_no_fix;
@@ -2209,29 +2115,28 @@ dht_dir_heal_xattrs(void *data)
gf_uuid_unparse(local->loc.gfid, gfid);
if (!mds_subvol) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "No mds subvol for %s gfid = %s", local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) ||
gf_uuid_is_null(local->loc.gfid)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "No gfid present so skip heal for path %s gfid = %s",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_NOT_PRESENT,
+ "skip-heal path=%s", local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
internal_xattr = dict_new();
if (!internal_xattr) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
xdata = dict_new();
if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
@@ -2239,18 +2144,17 @@ dht_dir_heal_xattrs(void *data)
user_xattr = dict_new();
if (!user_xattr) {
- gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
- "dictionary creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
goto out;
}
ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL,
NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "failed to list xattrs for "
- "%s: on %s ",
- local->loc.path, local->mds_subvol->name);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LIST_XATTRS_FAILED,
+ "path=%s", local->loc.path, "name=%s", local->mds_subvol->name,
+ NULL);
}
if (!mds_xattr)
@@ -2265,10 +2169,9 @@ dht_dir_heal_xattrs(void *data)
dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) {
ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value: key = %s,"
- " path = %s",
- GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, "path=%s",
+ local->loc.path, NULL);
goto out;
}
}
@@ -2280,16 +2183,25 @@ dht_dir_heal_xattrs(void *data)
if (subvol == mds_subvol)
continue;
if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
NULL);
if (ret) {
xattr_hashed = 1;
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Directory xattr heal failed. Failed to set"
- "user xattr on path %s on "
- "subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "set-user-xattr-failed path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
}
}
}
@@ -2298,21 +2210,17 @@ dht_dir_heal_xattrs(void *data)
ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero,
1);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value:key = %s for "
- "path %s",
- conf->mds_xattr_key, local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", conf->mds_xattr_key, "path=%s", local->loc.path,
+ NULL);
goto out;
}
ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL,
NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_XATTR_HEAL_FAILED,
- "Failed to reset internal xattr "
- "on path %s on subvol %s"
- "gfid = %s ",
- local->loc.path, mds_subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", mds_subvol->name, "gfid=%s", gfid, NULL);
}
}
@@ -2353,18 +2261,18 @@ dht_dir_attr_heal(void *data)
frame = data;
local = frame->local;
- mds_subvol = local->mds_subvol;
this = frame->this;
GF_VALIDATE_OR_GOTO("dht", this, out);
GF_VALIDATE_OR_GOTO("dht", local, out);
conf = this->private;
GF_VALIDATE_OR_GOTO("dht", conf, out);
+ mds_subvol = local->mds_subvol;
call_cnt = conf->subvolume_cnt;
if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "No mds subvol for %s gfid = %s", local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
@@ -2372,11 +2280,9 @@ dht_dir_attr_heal(void *data)
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->subvolumes[i] == mds_subvol) {
if (!conf->subvolume_status[i]) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_HASHED_SUBVOL_DOWN,
- "mds subvol is down for path "
- " %s gfid is %s Unable to set xattr ",
- local->loc.path, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
goto out;
}
}
@@ -2402,10 +2308,9 @@ dht_dir_attr_heal(void *data)
if (ret) {
gf_uuid_unparse(local->loc.gfid, gfid);
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_ATTR_HEAL_FAILED,
- "Directory attr heal failed. Failed to set"
- " uid/gid on path %s on subvol %s, gfid = %s ",
- local->loc.path, subvol->name, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_ATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
}
}
out:
@@ -2420,7 +2325,7 @@ dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data)
}
/* EXIT: dht_update_commit_hash_for_layout */
-int
+static int
dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2440,7 +2345,7 @@ dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
{
dht_local_t *local = NULL;
@@ -2458,11 +2363,8 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
local->op_ret = -1;
}
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Winding unlock failed: stale locks left on brick"
- " %s",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_WIND_UNLOCK_FAILED,
+ "path=%s", local->loc.path, NULL);
dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
}
@@ -2470,7 +2372,7 @@ dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
dict_t *xdata)
@@ -2497,7 +2399,7 @@ dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
return 0;
}
-int
+static int
dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xdata)
@@ -2525,11 +2427,8 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (!xattr) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: Allocation failed",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_COMMIT_HASH_FAILED,
+ "allocation-failed path=%s", local->loc.path, NULL);
goto err;
}
@@ -2540,11 +2439,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret < 0) {
local->op_errno = ENOENT;
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: (subvol %s) Failed to find disk layout",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMMIT_HASH_FAILED,
+ "path=%s", local->loc.path, "subvol=%s",
+ conf->local_subvols[i]->name, "find-disk-layout-failed",
+ NULL);
goto err;
}
@@ -2558,12 +2456,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret == -1) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: (subvol %s) Failed to extract disk"
- " layout",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", conf->local_subvols[i]->name,
+ "extract-disk-layout-failed", NULL);
goto err;
}
@@ -2572,11 +2468,9 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (!xattr[i]) {
local->op_errno = errno;
- gf_msg(this->name, GF_LOG_WARNING, errno,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory commit hash update failed:"
- " %s: Allocation failed",
- local->loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s Allocation-failed",
+ local->loc.path, NULL);
goto err;
}
@@ -2585,12 +2479,10 @@ dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
if (ret != 0) {
local->op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
- "Directory self heal xattr failed:"
- "%s: (subvol %s) Failed to set xattr"
- " dictionary,",
- local->loc.path, conf->local_subvols[i]->name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s",
+ local->loc.path, "subvol=%s", conf->local_subvols[i]->name,
+ "set-xattr-failed", NULL);
goto err;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 0a7aa15e242..bb72b0ffbb5 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -9,7 +9,7 @@
*/
/* TODO: add NS locking */
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "dht-common.h"
#include "dht-messages.h"
@@ -17,24 +17,6 @@
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
-#define GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf) \
- { \
- pthread_mutex_lock(&conf->defrag->dfq_mutex); \
- \
- if (!strcasecmp(conf->dthrottle, "lazy")) \
- conf->defrag->recon_thread_count = 1; \
- \
- throttle_count = MAX((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4); \
- \
- if (!strcasecmp(conf->dthrottle, "normal")) \
- conf->defrag->recon_thread_count = (throttle_count / 2); \
- \
- if (!strcasecmp(conf->dthrottle, "aggressive")) \
- conf->defrag->recon_thread_count = throttle_count; \
- \
- pthread_mutex_unlock(&conf->defrag->dfq_mutex); \
- }
-
/* TODO:
- use volumename in xattr instead of "dht"
- use NS locks
@@ -42,9 +24,7 @@
- complete linkfile selfheal
*/
-extern dht_methods_t dht_methods;
-
-void
+static void
dht_layout_dump(dht_layout_t *layout, const char *prefix)
{
char key[GF_DUMP_MAX_BUF_LEN];
@@ -52,8 +32,6 @@ dht_layout_dump(dht_layout_t *layout, const char *prefix)
if (!layout)
goto out;
- if (!prefix)
- goto out;
gf_proc_dump_build_key(key, prefix, "cnt");
gf_proc_dump_write(key, "%d", layout->cnt);
@@ -73,9 +51,9 @@ dht_layout_dump(dht_layout_t *layout, const char *prefix)
gf_proc_dump_build_key(key, prefix, "list[%d].err", i);
gf_proc_dump_write(key, "%d", layout->list[i].err);
gf_proc_dump_build_key(key, prefix, "list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].start);
gf_proc_dump_build_key(key, prefix, "list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].stop);
if (layout->list[i].xlator) {
gf_proc_dump_build_key(key, prefix, "list[%d].xlator.type", i);
gf_proc_dump_write(key, "%s", layout->list[i].xlator->type);
@@ -152,7 +130,7 @@ dht_priv_dump(xlator_t *this)
gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_percent);
snprintf(key, sizeof(key), "du_stats[%d].avail_space", i);
- gf_proc_dump_write(key, "%lu", conf->du_stats[i].avail_space);
+ gf_proc_dump_write(key, "%" PRIu64, conf->du_stats[i].avail_space);
snprintf(key, sizeof(key), "du_stats[%d].avail_inodes", i);
gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_inodes);
@@ -162,9 +140,9 @@ dht_priv_dump(xlator_t *this)
}
}
- if (conf->last_stat_fetch.tv_sec)
+ if (conf->last_stat_fetch)
gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
+ ctime(&conf->last_stat_fetch));
UNLOCK(&conf->subvolume_lock);
@@ -264,7 +242,7 @@ out:
return ret;
}
-int
+static int
dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
const char *bricks)
{
@@ -278,6 +256,10 @@ dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
goto out;
dup_brick = gf_strdup(bricks);
+ if (dup_brick == NULL) {
+ goto out;
+ }
+
node = strtok_r(dup_brick, ",", &tmpstr);
while (node) {
for (i = 0; i < conf->subvolume_cnt; i++) {
@@ -306,14 +288,10 @@ out:
return ret;
}
-int
+static void
dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
{
int i = 0;
- int ret = -1;
-
- if (!conf)
- goto out;
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->decommissioned_bricks[i]) {
@@ -321,13 +299,9 @@ dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
conf->decommission_subvols_cnt--;
}
}
-
- ret = 0;
-out:
-
- return ret;
}
-void
+
+static void
dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re,
gf_boolean_t *re_valid, dht_conf_t *conf)
{
@@ -384,7 +358,7 @@ out:
return ret;
}
-int
+static int
dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
{
int rebal_thread_count = 0;
@@ -401,18 +375,20 @@ dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
} else if ((gf_string2int(temp_str, &rebal_thread_count) == 0)) {
if ((rebal_thread_count > 0) &&
(rebal_thread_count <= MAX_REBAL_THREADS)) {
+ conf->defrag->recon_thread_count = rebal_thread_count;
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
gf_msg(this->name, GF_LOG_INFO, 0, 0,
"rebal thread count configured to %d",
rebal_thread_count);
- conf->defrag->recon_thread_count = rebal_thread_count;
+ goto out;
} else {
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
"Invalid option: Reconfigure: "
"rebal-throttle should be "
"within range of 0 and maximum number of"
" cores available");
ret = -1;
- pthread_mutex_unlock(&conf->defrag->dfq_mutex);
goto out;
}
} else {
@@ -521,9 +497,7 @@ dht_reconfigure(xlator_t *this, dict_t *options)
if (ret == -1)
goto out;
} else {
- ret = dht_decommissioned_remove(this, conf);
- if (ret == -1)
- goto out;
+ dht_decommissioned_remove(this, conf);
}
dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex,
@@ -563,6 +537,8 @@ gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
pattern_str = strtok_r(data, ",", &tmp_str);
while (pattern_str) {
dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
if (!pattern_list) {
goto out;
@@ -609,7 +585,7 @@ out:
return ret;
}
-int
+static int
dht_init_methods(xlator_t *this)
{
int ret = -1;
@@ -622,7 +598,6 @@ dht_init_methods(xlator_t *this)
methods = &(conf->methods);
methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
- methods->migration_needed = dht_migration_needed;
methods->migration_other = NULL;
methods->layout_search = dht_layout_search;
@@ -1071,84 +1046,6 @@ struct volume_options dht_options[] = {
/* NUFA option */
{.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
- /* tier options */
- {
- .key = {"tier-pause"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
-
- {
- .key = {"tier-promote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "120",
- },
-
- {
- .key = {"tier-demote-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "3600",
- },
-
- {
- .key = {"write-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
-
- {
- .key = {"read-freq-threshold"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"watermark-hi"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "90",
- },
- {
- .key = {"watermark-low"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "75",
- },
- {
- .key = {"tier-mode"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "test",
- },
- {
- .key = {"tier-compact"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- {.key = {"tier-hot-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on hot tier in system"},
- {.key = {"tier-cold-compact-frequency"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "604800",
- .description = "Frequency to compact DBs on cold tier in system"},
- {
- .key = {"tier-max-mb"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "4000",
- },
- {
- .key = {"tier-max-promote-file-size"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "0",
- },
- {
- .key = {"tier-max-files"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "10000",
- },
- {
- .key = {"tier-query-limit"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "100",
- },
/* switch option */
{.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 63ab926ca58..53de8292704 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "statedump.h"
#include "dht-common.h"
struct xlator_fops dht_pt_fops = {
diff --git a/xlators/cluster/dht/src/dht.sym b/xlators/cluster/dht/src/dht.sym
deleted file mode 100644
index 24241a91baf..00000000000
--- a/xlators/cluster/dht/src/dht.sym
+++ /dev/null
@@ -1,9 +0,0 @@
-xlator_api
-fops
-cbks
-class_methods
-dht_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 558611384fe..3648a564840 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -595,15 +595,9 @@ nufa_init(xlator_t *this)
dht_methods_t dht_methods = {
.migration_get_dst_subvol = dht_migration_get_dst_subvol,
- .migration_needed = dht_migration_needed,
.layout_search = dht_layout_search,
};
-class_methods_t class_methods = {.init = nufa_init,
- .fini = dht_fini,
- .reconfigure = dht_reconfigure,
- .notify = dht_notify};
-
struct xlator_fops fops = {
.lookup = nufa_lookup,
.create = nufa_create,
@@ -645,3 +639,19 @@ struct xlator_fops fops = {
};
struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "nufa",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/cluster/dht/src/nufa.sym b/xlators/cluster/dht/src/nufa.sym
deleted file mode 100644
index 780b5fc0387..00000000000
--- a/xlators/cluster/dht/src/nufa.sym
+++ /dev/null
@@ -1,8 +0,0 @@
-fops
-cbks
-class_methods
-dht_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index a3c384b0f5c..207d109a025 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -610,9 +610,15 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
/* Get the pattern for considering switch case.
"option block-size *avi:10MB" etc */
option_string = gf_strdup(pattern_str);
+ if (option_string == NULL) {
+ goto err;
+ }
switch_str = strtok_r(option_string, ";", &tmp_str);
while (switch_str) {
dup_str = gf_strdup(switch_str);
+ if (dup_str == NULL) {
+ goto err;
+ }
switch_opt = GF_CALLOC(1, sizeof(struct switch_struct),
gf_switch_mt_switch_struct);
if (!switch_opt) {
@@ -647,6 +653,9 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
if (childs) {
dup_childs = gf_strdup(childs);
+ if (dup_childs == NULL) {
+ goto err;
+ }
child = strtok_r(dup_childs, ",", &tmp);
while (child) {
if (gf_switch_valid_child(this, child)) {
@@ -823,11 +832,6 @@ err:
return -1;
}
-class_methods_t class_methods = {.init = switch_init,
- .fini = switch_fini,
- .reconfigure = dht_reconfigure,
- .notify = dht_notify};
-
struct xlator_fops fops = {
.lookup = switch_lookup,
.create = switch_create,
@@ -869,3 +873,19 @@ struct xlator_fops fops = {
};
struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "switch",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/cluster/dht/src/switch.sym b/xlators/cluster/dht/src/switch.sym
deleted file mode 100644
index 780b5fc0387..00000000000
--- a/xlators/cluster/dht/src/switch.sym
+++ /dev/null
@@ -1,8 +0,0 @@
-fops
-cbks
-class_methods
-dht_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c
deleted file mode 100644
index b86ed673042..00000000000
--- a/xlators/cluster/dht/src/tier-common.c
+++ /dev/null
@@ -1,1199 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "libxlator.h"
-#include "dht-common.h"
-#include "defaults.h"
-#include "tier-common.h"
-#include "tier.h"
-
-int
-dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
-
-int
-tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- loc_t *oldloc = NULL;
- loc_t *newloc = NULL;
-
- local = frame->local;
-
- oldloc = &local->loc;
- newloc = &local->loc2;
-
- if (op_ret == -1) {
- /* No continuation on DHT inode missing errors, as we should
- * then have a good stbuf that states P2 happened. We would
- * get inode missing if, the file completed migrated between
- * the lookup and the link call */
- goto out;
- }
-
- if (local->call_cnt != 1) {
- goto out;
- }
-
- local->call_cnt = 2;
-
- /* Do this on the hot tier now */
-
- STACK_WIND(frame, tier_link_cbk, local->cached_subvol,
- local->cached_subvol->fops->link, oldloc, newloc, xdata);
-
- return 0;
-
-out:
- DHT_STRIP_PHASE1_FLAGS(stbuf);
-
- DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
-
- return 0;
-}
-
-int
-tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(newloc, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->call_cnt = 1;
-
- cached_subvol = local->cached_subvol;
-
- if (!cached_subvol) {
- gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
- oldloc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- ret = loc_copy(&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- if (hashed_subvol == cached_subvol) {
- STACK_WIND(frame, dht_link_cbk, cached_subvol,
- cached_subvol->fops->link, oldloc, newloc, xdata);
- return 0;
- }
-
- /* Create hardlinks to both the data file on the hot tier
- and the linkto file on the cold tier */
-
- gf_uuid_copy(local->gfid, oldloc->inode->gfid);
-
- STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link,
- oldloc, newloc, xdata);
-
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int
-tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->params) {
- dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
- }
-
- DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
-
- return 0;
-}
-
-int
-tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- xlator_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- conf = this->private;
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- if (op_ret == -1) {
- if (local->linked == _gf_true && local->xattr_req) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
- local->xattr_req);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value to "
- "unlink of migrating file");
- goto out;
- }
-
- STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, &local->loc,
- 0, local->xattr_req);
- return 0;
- }
- goto out;
- }
-
- prev = cookie;
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
-
- dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
- }
-
- ret = dht_layout_preset(this, prev, inode);
- if (ret != 0) {
- gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
- prev->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- local->op_errno = op_errno;
-
- if (local->linked == _gf_true) {
- local->stbuf = *stbuf;
- dht_linkfile_attr_heal(frame, this);
- }
-out:
- if (local) {
- if (local->xattr_req) {
- dict_del(local->xattr_req, TIER_LINKFILE_GFID);
- }
- }
-
- DHT_STRIP_PHASE1_FLAGS(stbuf);
-
- DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent, xdata);
-
- return 0;
-}
-
-int
-tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- unsigned char *gfid = NULL;
-
- local = frame->local;
- if (!local) {
- op_errno = EINVAL;
- goto err;
- }
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
- }
-
- conf = this->private;
- if (!conf) {
- local->op_errno = EINVAL;
- op_errno = EINVAL;
- goto err;
- }
-
- cached_subvol = TIER_UNHASHED_SUBVOL;
-
- if (local->params) {
- dict_del(local->params, conf->link_xattr_name);
- dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
- }
-
- /*
- * We will delete the linkfile if data file creation fails.
- * When deleting this stale linkfile, there is a possibility
- * for a race between this linkfile deletion and a stale
- * linkfile deletion triggered by another lookup from different
- * client.
- *
- * For eg:
- *
- * Client 1 Client 2
- *
- * 1 linkfile created for foo
- *
- * 2 data file creation failed
- *
- * 3 creating a file with same name
- *
- * 4 lookup before creation deleted
- * the linkfile created by client1
- * considering as a stale linkfile.
- *
- * 5 New linkfile created for foo
- * with different gfid.
- *
- * 6 Trigger linkfile deletion as
- * data file creation failed.
- *
- * 7 Linkfile deleted which is
- * created by client2.
- *
- * 8 Data file created.
- *
- * With this race, we will end up having a file in a non-hashed subvol
- * without a linkfile in hashed subvol.
- *
- * To avoid this, we store the gfid of linkfile created by client, So
- * If we delete the linkfile , we validate gfid of existing file with
- * stored value from posix layer.
- *
- * Storing this value in local->xattr_req as local->params was also used
- * to create the data file. During the linkfile deletion we will use
- * local->xattr_req dictionary.
- */
- if (!local->xattr_req) {
- local->xattr_req = dict_new();
- if (!local->xattr_req) {
- local->op_errno = ENOMEM;
- op_errno = ENOMEM;
- goto err;
- }
- }
-
- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
- if (!gfid) {
- local->op_errno = ENOMEM;
- op_errno = ENOMEM;
- goto err;
- }
-
- gf_uuid_copy(gfid, stbuf->ia_gfid);
- ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid,
- sizeof(uuid_t));
- if (ret) {
- GF_FREE(gfid);
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- TIER_LINKFILE_GFID);
- }
-
- STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol,
- cached_subvol->fops->create, &local->loc, local->flags,
- local->mode, local->umask, local->fd, local->params);
-
- return 0;
-err:
- DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-gf_boolean_t
-tier_is_hot_tier_decommissioned(xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- xlator_t *hot_tier = NULL;
- int i = 0;
-
- conf = this->private;
- hot_tier = conf->subvolumes[1];
-
- if (conf->decommission_subvols_cnt) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->decommissioned_bricks[i] &&
- conf->decommissioned_bricks[i] == hot_tier)
- return _gf_true;
- }
- }
-
- return _gf_false;
-}
-
-int
-tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
-{
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *hot_subvol = NULL;
- xlator_t *cold_subvol = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- conf = this->private;
-
- dht_get_du_info(frame, this, loc);
-
- local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- cold_subvol = TIER_HASHED_SUBVOL;
- hot_subvol = TIER_UNHASHED_SUBVOL;
-
- if (conf->subvolumes[0] != cold_subvol) {
- hot_subvol = conf->subvolumes[0];
- }
- /*
- * if hot tier full, write to cold.
- * Also if hot tier is full, create in cold
- */
- if (dht_is_subvol_filled(this, hot_subvol) ||
- tier_is_hot_tier_decommissioned(this)) {
- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
- cold_subvol->name);
-
- STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol,
- cold_subvol->fops->create, loc, flags, mode, umask,
- fd, params);
- } else {
- local->params = dict_ref(params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = hot_subvol;
- local->hashed_subvol = cold_subvol;
-
- gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path,
- hot_subvol->name, cold_subvol->name);
-
- dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this,
- hot_subvol, cold_subvol, loc);
-
- goto out;
- }
-out:
- return 0;
-
-err:
-
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
- NULL);
-
- return 0;
-}
-
-int
-tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret == -1)
- goto err;
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int
-tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *preparent, dict_t *xdata,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *hot_subvol = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
- hot_subvol = TIER_UNHASHED_SUBVOL;
-
- if (!op_ret) {
- /*
- * linkfile present on hot tier. unlinking the linkfile
- */
- STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol,
- hot_subvol, hot_subvol->fops->unlink, &local->loc,
- local->flags, NULL);
- return 0;
- }
-
- LOCK(&frame->lock);
- {
- if (op_errno == ENOENT) {
- local->op_ret = 0;
- local->op_errno = op_errno;
- } else {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1",
- prev->name);
- }
-
- UNLOCK(&frame->lock);
-
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-}
-
-int
-tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- /* Ignore EINVAL for tier to ignore error when the file
- does not exist on the other tier */
- if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- gf_msg_debug(this->name, op_errno,
- "Unlink link: subvolume %s"
- " returned -1",
- prev->name);
- goto unlock;
- }
-
- local->op_ret = 0;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret == -1)
- goto err;
-
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-
-err:
- DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *prev = NULL;
- struct iatt *stbuf = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- xlator_t *hot_tier = NULL;
- xlator_t *cold_tier = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- cold_tier = TIER_HASHED_SUBVOL;
- hot_tier = TIER_UNHASHED_SUBVOL;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOENT) {
- local->op_ret = 0;
- } else {
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- gf_msg_debug(this->name, op_errno,
- "Unlink: subvolume %s returned -1"
- " with errno = %d",
- prev->name, op_errno);
- goto unlock;
- }
-
- local->op_ret = 0;
-
- local->postparent = *postparent;
- local->preparent = *preparent;
-
- if (local->loc.parent) {
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->preparent, 0);
- dht_inode_ctx_time_update(local->loc.parent, this,
- &local->postparent, 1);
- }
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (local->op_ret)
- goto out;
-
- if (cold_tier != local->cached_subvol) {
- /*
- * File is present in hot tier, so there will be
- * a link file on cold tier, deleting the linkfile
- * from cold tier
- */
- STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier,
- cold_tier->fops->unlink, &local->loc, local->flags,
- xdata);
- return 0;
- }
-
- ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- if (!ret && stbuf &&
- ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) {
- /*
- * File is migrating from cold to hot tier.
- * Delete the destination linkfile.
- */
- STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier,
- hot_tier->fops->lookup, &local->loc, NULL);
- return 0;
- }
-
-out:
- DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
-
- return 0;
-}
-
-int
-tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- int ret = -1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
- }
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- local->flags = xflag;
- if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) {
- /*
- * File resides in cold tier. We need to stat
- * the file to see if it is being promoted.
- * If yes we need to delete the destination
- * file as well.
- *
- * Currently we are doing this check only for
- * regular files.
- */
- xdata = xdata ? dict_ref(xdata) : dict_new();
- if (xdata) {
- ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
- if (ret) {
- gf_msg_debug(this->name, 0, "Failed to set dictionary key %s",
- DHT_IATT_IN_XDATA_KEY);
- }
- }
- }
-
- /*
- * File is on hot tier, delete the data file first, then
- * linkfile from cold.
- */
- STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol,
- cached_subvol->fops->unlink, loc, xflag, xdata);
- if (xdata)
- dict_unref(xdata);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int
-tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- int count = 0;
-
- INIT_LIST_HEAD(&entries.list);
-
- if (op_ret < 0)
- goto unwind;
-
- list_for_each_entry(orig_entry, (&orig_entries->list), list)
- {
- entry = gf_dirent_for_name(orig_entry->d_name);
- if (!entry) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "Memory allocation failed ");
- goto unwind;
- }
-
- entry->d_off = orig_entry->d_off;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- list_add_tail(&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free(&entries);
-
- return 0;
-}
-
-int
-tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- xlator_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_conf_t *conf = NULL;
- int ret = 0;
- inode_table_t *itable = NULL;
- inode_t *inode = NULL;
-
- INIT_LIST_HEAD(&entries.list);
- prev = cookie;
- local = frame->local;
- itable = local->fd ? local->fd->inode->table : NULL;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
-
- if (op_ret < 0)
- goto done;
-
- list_for_each_entry(orig_entry, (&orig_entries->list), list)
- {
- next_offset = orig_entry->d_off;
-
- if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
- /*stat failed somewhere- ignore this entry*/
- continue;
- }
-
- entry = gf_dirent_for_name(orig_entry->d_name);
- if (!entry) {
- goto unwind;
- }
-
- entry->d_off = orig_entry->d_off;
- entry->d_stat = orig_entry->d_stat;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- if (orig_entry->dict)
- entry->dict = dict_ref(orig_entry->dict);
-
- if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
- conf->link_xattr_name)) {
- goto entries;
-
- } else if (IA_ISDIR(entry->d_stat.ia_type)) {
- if (orig_entry->inode) {
- dht_inode_ctx_time_update(orig_entry->inode, this,
- &entry->d_stat, 1);
- }
- } else {
- if (orig_entry->inode) {
- ret = dht_layout_preset(this, prev, orig_entry->inode);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout "
- "in inode");
-
- entry->inode = inode_ref(orig_entry->inode);
- } else if (itable) {
- /*
- * orig_entry->inode might be null if any upper
- * layer xlators below client set to null, to
- * force a lookup on the inode even if the inode
- * is present in the inode table. In that case
- * we just update the ctx to make sure we didn't
- * missed anything.
- */
- inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
- if (inode) {
- ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- DHT_MSG_LAYOUT_SET_FAILED,
- "failed to link the layout"
- " in inode");
- inode_unref(inode);
- inode = NULL;
- }
- }
- }
-
- entries:
- list_add_tail(&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
-
-done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset != 0) {
- next_subvol = prev;
- } else {
- goto unwind;
- }
-
- STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol,
- next_subvol->fops->readdirp, local->fd, local->size,
- next_offset, local->xattr);
- return 0;
- }
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
-
- gf_dirent_free(&entries);
-
- return 0;
-}
-
-int
-tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop, dict_t *dict)
-{
- dht_local_t *local = NULL;
- int op_errno = -1;
- xlator_t *hashed_subvol = NULL;
- int ret = 0;
- dht_conf_t *conf = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, NULL, NULL, whichop);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref(fd);
- local->size = size;
- local->xattr_req = (dict) ? dict_ref(dict) : NULL;
-
- hashed_subvol = TIER_HASHED_SUBVOL;
-
- /* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIRP) {
- if (dict)
- local->xattr = dict_ref(dict);
- else
- local->xattr = dict_new();
-
- if (local->xattr) {
- ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
- if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
- " : key = %s",
- conf->link_xattr_name);
- }
-
- STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol,
- hashed_subvol, hashed_subvol->fops->readdirp, fd,
- size, yoff, local->xattr);
-
- } else {
- STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol,
- hashed_subvol->fops->readdir, fd, size, yoff,
- local->xattr);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int
-tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata)
-{
- int op = GF_FOP_READDIR;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- op = GF_FOP_READDIRP;
- break;
- }
- }
-
- if (conf->use_readdirp)
- op = GF_FOP_READDIRP;
-
-out:
- tier_do_readdir(frame, this, fd, size, yoff, op, 0);
- return 0;
-}
-
-int
-tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *dict)
-{
- tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
- return 0;
-}
-
-int
-tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct statvfs *statvfs, dict_t *xdata)
-{
- gf_boolean_t event = _gf_false;
- qdstatfs_action_t action = qdstatfs_action_OFF;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int bsize = 0;
- int frsize = 0;
- GF_UNUSED int ret = 0;
- unsigned long new_usage = 0;
- unsigned long cur_usage = 0;
- xlator_t *prev = NULL;
- dht_conf_t *conf = NULL;
- tier_statvfs_t *tier_stat = NULL;
-
- prev = cookie;
- local = frame->local;
- GF_ASSERT(local);
-
- conf = this->private;
-
- if (xdata)
- ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
-
- tier_stat = &local->tier_statvfs;
-
- LOCK(&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- if (!statvfs) {
- op_errno = EINVAL;
- local->op_ret = -1;
- goto unlock;
- }
- local->op_ret = 0;
-
- if (local->quota_deem_statfs) {
- if (event == _gf_true) {
- action = qdstatfs_action_COMPARE;
- } else {
- action = qdstatfs_action_NEGLECT;
- }
- } else {
- if (event == _gf_true) {
- action = qdstatfs_action_REPLACE;
- local->quota_deem_statfs = _gf_true;
- }
- }
-
- if (local->quota_deem_statfs) {
- switch (action) {
- case qdstatfs_action_NEGLECT:
- goto unlock;
-
- case qdstatfs_action_REPLACE:
- local->statvfs = *statvfs;
- goto unlock;
-
- case qdstatfs_action_COMPARE:
- new_usage = statvfs->f_blocks - statvfs->f_bfree;
- cur_usage = local->statvfs.f_blocks -
- local->statvfs.f_bfree;
-
- /* Take the max of the usage from subvols */
- if (new_usage >= cur_usage)
- local->statvfs = *statvfs;
- goto unlock;
-
- default:
- break;
- }
- }
-
- if (local->statvfs.f_bsize != 0) {
- bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
- frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
- dht_normalize_stats(&local->statvfs, bsize, frsize);
- dht_normalize_stats(statvfs, bsize, frsize);
- } else {
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
- }
-
- if (prev == TIER_HASHED_SUBVOL) {
- local->statvfs.f_blocks = statvfs->f_blocks;
- local->statvfs.f_files = statvfs->f_files;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
- tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree);
- tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail);
- tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree);
- tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail);
- tier_stat->hashed_fsid = statvfs->f_fsid;
- } else {
- tier_stat->unhashed_fsid = statvfs->f_fsid;
- tier_stat->unhashed_blocks_used = (statvfs->f_blocks -
- statvfs->f_bfree);
- tier_stat->unhashed_pblocks_used = (statvfs->f_blocks -
- statvfs->f_bavail);
- tier_stat->unhashed_files_used = (statvfs->f_files -
- statvfs->f_ffree);
- tier_stat->unhashed_pfiles_used = (statvfs->f_files -
- statvfs->f_favail);
- }
- }
-unlock:
- UNLOCK(&frame->lock);
-
- this_call_cnt = dht_frame_return(frame);
- if (is_last_call(this_call_cnt)) {
- if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) {
- tier_stat->blocks_used += tier_stat->unhashed_blocks_used;
- tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used;
- tier_stat->files_used += tier_stat->unhashed_files_used;
- tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used;
- }
- local->statvfs.f_bfree = local->statvfs.f_blocks -
- tier_stat->blocks_used;
- local->statvfs.f_bavail = local->statvfs.f_blocks -
- tier_stat->pblocks_used;
- local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used;
- local->statvfs.f_favail = local->statvfs.f_files -
- tier_stat->pfiles_used;
- DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs, xdata);
- }
-
- return 0;
-}
-
-int
-tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
- inode_t *inode = NULL;
- inode_table_t *itable = NULL;
- uuid_t root_gfid = {
- 0,
- };
- loc_t newloc = {
- 0,
- };
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(this->private, err);
-
- conf = this->private;
-
- local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
- itable = loc->inode->table;
- if (!itable) {
- op_errno = EINVAL;
- goto err;
- }
-
- loc = &local->loc2;
- root_gfid[15] = 1;
-
- inode = inode_find(itable, root_gfid);
- if (!inode) {
- op_errno = EINVAL;
- goto err;
- }
-
- dht_build_root_loc(inode, &newloc);
- loc = &newloc;
- }
-
- local->call_cnt = conf->subvolume_cnt;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i],
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc, xdata);
- }
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h
deleted file mode 100644
index b1ebaa8004d..00000000000
--- a/xlators/cluster/dht/src/tier-common.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_COMMON_H_
-#define _TIER_COMMON_H_
-/* Function definitions */
-int
-tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int
-tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
-
-int32_t
-tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata);
-
-int32_t
-tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *dict);
-
-int
-tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata);
-
-int
-tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata);
-
-int
-tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
-
-#endif
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
deleted file mode 100644
index 584f1dd76ba..00000000000
--- a/xlators/cluster/dht/src/tier.c
+++ /dev/null
@@ -1,3090 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <dlfcn.h>
-
-#include "dht-common.h"
-#include "tier.h"
-#include "tier-common.h"
-#include "syscall.h"
-#include "events.h"
-#include "tier-ctr-interface.h"
-
-/*Hard coded DB info*/
-static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3;
-/*Hard coded DB info*/
-
-/*Mutex for updating the data movement stats*/
-static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-/* Stores the path location of promotion query files */
-static char *promotion_qfile;
-/* Stores the path location of demotion query files */
-static char *demotion_qfile;
-
-static void *libhandle;
-static gfdb_methods_t gfdb_methods;
-
-#define DB_QUERY_RECORD_SIZE 4096
-
-/*
- * Closes all the fds and frees the qfile_array
- * */
-static void
-qfile_array_free(tier_qfile_array_t *qfile_array)
-{
- ssize_t i = 0;
-
- if (qfile_array) {
- if (qfile_array->fd_array) {
- for (i = 0; i < qfile_array->array_size; i++) {
- if (qfile_array->fd_array[i] != -1) {
- sys_close(qfile_array->fd_array[i]);
- }
- }
- }
- GF_FREE(qfile_array->fd_array);
- }
- GF_FREE(qfile_array);
-}
-
-/* Create a new query file list with given size */
-static tier_qfile_array_t *
-qfile_array_new(ssize_t array_size)
-{
- int ret = -1;
- tier_qfile_array_t *qfile_array = NULL;
- ssize_t i = 0;
-
- GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out);
-
- qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t),
- gf_tier_mt_qfile_array_t);
- if (!qfile_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to allocate memory for tier_qfile_array_t");
- goto out;
- }
-
- qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int),
- gf_dht_mt_int32_t);
- if (!qfile_array->fd_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to allocate memory for "
- "tier_qfile_array_t->fd_array");
- goto out;
- }
-
- /* Init all the fds to -1 */
- for (i = 0; i < array_size; i++) {
- qfile_array->fd_array[i] = -1;
- }
-
- qfile_array->array_size = array_size;
- qfile_array->next_index = 0;
-
- /* Set exhausted count to list size as the list is empty */
- qfile_array->exhausted_count = qfile_array->array_size;
-
- ret = 0;
-out:
- if (ret) {
- qfile_array_free(qfile_array);
- qfile_array = NULL;
- }
- return qfile_array;
-}
-
-/* Checks if the query file list is empty or totally exhausted. */
-static gf_boolean_t
-is_qfile_array_empty(tier_qfile_array_t *qfile_array)
-{
- return (qfile_array->exhausted_count == qfile_array->array_size)
- ? _gf_true
- : _gf_false;
-}
-
-/* Shifts the next_fd pointer to the next available fd in the list */
-static void
-shift_next_index(tier_qfile_array_t *qfile_array)
-{
- int qfile_fd = 0;
- int spin_count = 0;
-
- if (is_qfile_array_empty(qfile_array)) {
- return;
- }
-
- do {
- /* change next_index in a rotional manner */
- (qfile_array->next_index == (qfile_array->array_size - 1))
- ? qfile_array->next_index = 0
- : qfile_array->next_index++;
-
- qfile_fd = (qfile_array->fd_array[qfile_array->next_index]);
-
- spin_count++;
-
- } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size));
-}
-
-/*
- * This is a non-thread safe function to read query records
- * from a list of query files in a Round-Robin manner.
- * As in when the query files get exhuasted they are closed.
- * Returns:
- * 0 if all the query records in all the query files of the list are
- * exhausted.
- * > 0 if a query record is successfully read. Indicates the size of the query
- * record read.
- * < 0 if there was failure
- * */
-static int
-read_query_record_list(tier_qfile_array_t *qfile_array,
- gfdb_query_record_t **query_record)
-{
- int ret = -1;
- int qfile_fd = 0;
-
- GF_VALIDATE_OR_GOTO("tier", qfile_array, out);
- GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out);
-
- do {
- if (is_qfile_array_empty(qfile_array)) {
- ret = 0;
- break;
- }
-
- qfile_fd = qfile_array->fd_array[qfile_array->next_index];
- ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record);
- if (ret <= 0) {
- /*The qfile_fd has reached EOF or
- * there was an error.
- * 1. Close the exhausted fd
- * 2. increment the exhausted count
- * 3. shift next_qfile to next qfile
- **/
- sys_close(qfile_fd);
- qfile_array->fd_array[qfile_array->next_index] = -1;
- qfile_array->exhausted_count++;
- /* shift next_qfile to next qfile */
- shift_next_index(qfile_array);
- continue;
- } else {
- /* shift next_qfile to next qfile */
- shift_next_index(qfile_array);
- break;
- }
- } while (1);
-out:
- return ret;
-}
-
-/* Check and update the watermark every WM_INTERVAL seconds */
-#define WM_INTERVAL 5
-#define WM_INTERVAL_EMERG 1
-
-static int
-tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
-{
- int ret = -1;
- dict_t *dict = NULL;
- char *uuid_str = NULL;
- uuid_t node_uuid = {
- 0,
- };
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, loc, out);
- GF_VALIDATE_OR_GOTO(this->name, defrag, out);
-
- if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path);
- goto out;
- }
-
- if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get node-uuids for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_parse(uuid_str, node_uuid)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "uuid_parse failed for %s", loc->path);
- goto out;
- }
-
- if (gf_uuid_compare(node_uuid, defrag->node_uuid)) {
- gf_msg_debug(this->name, 0, "%s does not belong to this node",
- loc->path);
- ret = 1;
- goto out;
- }
-
- ret = 0;
-out:
- if (dict)
- dict_unref(dict);
-
- return ret;
-}
-
-int
-tier_get_fs_stat(xlator_t *this, loc_t *root_loc)
-{
- int ret = 0;
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- dict_t *xdata = NULL;
- struct statvfs statfs = {
- 0,
- };
- gf_tier_conf_t *tier_conf = NULL;
-
- conf = this->private;
- if (!conf) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "conf is NULL");
- ret = -1;
- goto exit;
- }
-
- defrag = conf->defrag;
- if (!defrag) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "defrag is NULL");
- ret = -1;
- goto exit;
- }
-
- tier_conf = &defrag->tier_conf;
-
- xdata = dict_new();
- if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
- "failed to allocate dictionary");
- ret = -1;
- goto exit;
- }
-
- ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
- "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
- ret = -1;
- goto exit;
- }
-
- /* Find how much free space is on the hot subvolume.
- * Then see if that value */
- /* is less than or greater than user defined watermarks.
- * Stash results in */
- /* the tier_conf data structure. */
-
- ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS,
- "Unable to obtain statfs.");
- goto exit;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
-
- tier_conf->block_size = statfs.f_bsize;
- tier_conf->blocks_total = statfs.f_blocks;
- tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree;
-
- tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used,
- statfs.f_blocks);
- pthread_mutex_unlock(&dm_stat_mutex);
-
-exit:
- if (xdata)
- dict_unref(xdata);
- return ret;
-}
-
-static void
-tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm,
- tier_watermark_op_t new_wm)
-{
- if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) {
- if (new_wm == TIER_WM_MID) {
- gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname);
- } else if (new_wm == TIER_WM_HI) {
- gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
- }
- } else if (old_wm == TIER_WM_MID) {
- if (new_wm == TIER_WM_LOW) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
- } else if (new_wm == TIER_WM_HI) {
- gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname);
- }
- } else if (old_wm == TIER_WM_HI) {
- if (new_wm == TIER_WM_MID) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname);
- } else if (new_wm == TIER_WM_LOW) {
- gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname);
- }
- }
-}
-
-int
-tier_check_watermark(xlator_t *this)
-{
- int ret = -1;
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- tier_watermark_op_t wm = TIER_WM_NONE;
-
- conf = this->private;
- if (!conf)
- goto exit;
-
- defrag = conf->defrag;
- if (!defrag)
- goto exit;
-
- tier_conf = &defrag->tier_conf;
-
- if (tier_conf->percent_full < tier_conf->watermark_low) {
- wm = TIER_WM_LOW;
-
- } else if (tier_conf->percent_full < tier_conf->watermark_hi) {
- wm = TIER_WM_MID;
-
- } else {
- wm = TIER_WM_HI;
- }
-
- if (wm != tier_conf->watermark_last) {
- tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last,
- wm);
-
- tier_conf->watermark_last = wm;
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tier watermark now %d", wm);
- }
-
- ret = 0;
-
-exit:
- return ret;
-}
-
-static gf_boolean_t
-is_hot_tier_full(gf_tier_conf_t *tier_conf)
-{
- if (tier_conf && (tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return _gf_true;
-
- return _gf_false;
-}
-
-int
-tier_do_migration(xlator_t *this, int promote)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- long rand = 0;
- int migrate = 0;
- gf_tier_conf_t *tier_conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto exit;
-
- defrag = conf->defrag;
- if (!defrag)
- goto exit;
-
- if (tier_check_watermark(this) != 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get watermark");
- goto exit;
- }
-
- tier_conf = &defrag->tier_conf;
-
- switch (tier_conf->watermark_last) {
- case TIER_WM_LOW:
- migrate = promote ? 1 : 0;
- break;
- case TIER_WM_HI:
- migrate = promote ? 0 : 1;
- break;
- case TIER_WM_MID:
- /* coverity[DC.WEAK_CRYPTO] */
- rand = random() % 100;
- if (promote) {
- migrate = (rand > tier_conf->percent_full);
- } else {
- migrate = (rand <= tier_conf->percent_full);
- }
- break;
- }
-
-exit:
- return migrate;
-}
-
-int
-tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc,
- gf_tier_conf_t *tier_conf)
-{
- int ret = -1;
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- if (is_promotion)
- tier_conf->promote_in_progress = 1;
- else
- tier_conf->demote_in_progress = 1;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- /* Data migration */
- ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL);
-
- pthread_mutex_lock(&tier_conf->pause_mutex);
- if (is_promotion)
- tier_conf->promote_in_progress = 0;
- else
- tier_conf->demote_in_progress = 0;
- pthread_mutex_unlock(&tier_conf->pause_mutex);
-
- return ret;
-}
-
-/* returns _gf_true: if file can be promoted
- * returns _gf_false: if file cannot be promoted
- */
-static gf_boolean_t
-tier_can_promote_file(xlator_t *this, char const *file_name,
- struct iatt *current, gf_defrag_info_t *defrag)
-{
- gf_boolean_t ret = _gf_false;
- fsblkcnt_t estimated_usage = 0;
-
- if (defrag->tier_conf.tier_max_promote_size &&
- (current->ia_size > defrag->tier_conf.tier_max_promote_size)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "File %s (gfid:%s) with size (%" PRIu64
- ") exceeds maxsize "
- "(%d) for promotion. File will not be promoted.",
- file_name, uuid_utoa(current->ia_gfid), current->ia_size,
- defrag->tier_conf.tier_max_promote_size);
- goto err;
- }
-
- /* bypass further validations for TEST mode */
- if (defrag->tier_conf.mode != TIER_MODE_WM) {
- ret = _gf_true;
- goto err;
- }
-
- /* convert the file size to blocks as per the block size of the
- * destination tier
- * NOTE: add (block_size - 1) to get the correct block size when
- * there is a remainder after a modulo
- */
- estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) /
- defrag->tier_conf.block_size) +
- defrag->tier_conf.blocks_used;
-
- /* test if the estimated block usage goes above HI watermark */
- if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >=
- defrag->tier_conf.watermark_hi) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Estimated block count consumption on "
- "hot tier (%" PRIu64
- ") exceeds hi watermark (%d%%). "
- "File will not be promoted.",
- estimated_usage, defrag->tier_conf.watermark_hi);
- goto err;
- }
- ret = _gf_true;
-err:
- return ret;
-}
-
-static int
-tier_set_migrate_data(dict_t *migrate_data)
-{
- int failed = 1;
-
- failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force");
- if (failed) {
- goto bail_out;
- }
-
- /* Flag to suggest the xattr call is from migrator */
- failed = dict_set_str(migrate_data, "from.migrator", "yes");
- if (failed) {
- goto bail_out;
- }
-
- /* Flag to suggest its a tiering migration
- * The reason for this dic key-value is that
- * promotions and demotions are multithreaded
- * so the original frame from gf_defrag_start()
- * is not carried. A new frame will be created when
- * we do syncop_setxattr(). This does not have the
- * frame->root->pid of the original frame. So we pass
- * this dic key-value when we do syncop_setxattr() to do
- * data migration and set the frame->root->pid to
- * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before
- * calling dht_start_rebalance_task() */
- failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes");
- if (failed) {
- goto bail_out;
- }
-
- failed = 0;
-
-bail_out:
- return failed;
-}
-
-static char *
-tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf,
- int *per_link_status)
-{
- int ret = -1;
- char *parent_path = NULL;
- dict_t *xdata_request = NULL;
- dict_t *xdata_response = NULL;
-
- xdata_request = dict_new();
- if (!xdata_request) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create xdata_request dict");
- goto err;
- }
- ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to set value to dict : key %s \n",
- GET_ANCESTRY_PATH_KEY);
- goto err;
- }
-
- ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request,
- &xdata_response);
- /* When the parent gfid is a stale entry, the lookup
- * will fail and stop the demotion process.
- * The parent gfid can be stale when a huge folder is
- * deleted while the files within it are being migrated
- */
- if (ret == -ESTALE) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
- "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = 1;
- goto err;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Error in parent lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = -1;
- goto err;
- }
- ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path);
- if (ret || !parent_path) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get parent path for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = -1;
- goto err;
- }
-
-err:
- if (xdata_request) {
- dict_unref(xdata_request);
- }
-
- if (xdata_response) {
- dict_unref(xdata_response);
- xdata_response = NULL;
- }
-
- return parent_path;
-}
-
-static int
-tier_get_file_name_and_path(xlator_t *this, uuid_t gfid,
- gfdb_link_info_t *link_info,
- char const *parent_path, loc_t *loc,
- int *per_link_status)
-{
- int ret = -1;
-
- loc->name = gf_strdup(link_info->file_name);
- if (!loc->name) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Memory "
- "allocation failed for %s",
- uuid_utoa(gfid));
- *per_link_status = -1;
- goto err;
- }
- ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "construct file path for %s %s\n",
- parent_path, loc->name);
- *per_link_status = -1;
- goto err;
- }
-
- ret = 0;
-
-err:
- return ret;
-}
-
-static int
-tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current,
- int *per_link_status)
-{
- int ret = -1;
-
- ret = syncop_lookup(this, loc, current, NULL, NULL, NULL);
-
- /* The file may be deleted even when the parent
- * is available and the lookup will
- * return a stale entry which would stop the
- * migration. so if its a stale entry, then skip
- * the file and keep migrating.
- */
- if (ret == -ESTALE) {
- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP,
- "Stale lookup for %s", uuid_utoa(p_loc->gfid));
- *per_link_status = 1;
- goto err;
- } else if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "lookup file %s\n",
- loc->name);
- *per_link_status = -1;
- goto err;
- }
- ret = 0;
-
-err:
- return ret;
-}
-
-static gf_boolean_t
-tier_is_file_already_at_destination(xlator_t *src_subvol,
- query_cbk_args_t *query_cbk_args,
- dht_conf_t *conf, int *per_link_status)
-{
- gf_boolean_t at_destination = _gf_true;
-
- if (src_subvol == NULL) {
- *per_link_status = 1;
- goto err;
- }
- if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) {
- *per_link_status = 1;
- goto err;
- }
-
- if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) {
- *per_link_status = 1;
- goto err;
- }
- at_destination = _gf_false;
-
-err:
- return at_destination;
-}
-
-static void
-tier_update_migration_counters(query_cbk_args_t *query_cbk_args,
- gf_defrag_info_t *defrag,
- uint64_t *total_migrated_bytes, int *total_files)
-{
- if (query_cbk_args->is_promotion) {
- defrag->total_files_promoted++;
- *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size;
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.blocks_used += defrag->tier_conf
- .st_last_promoted_size;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else {
- defrag->total_files_demoted++;
- *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size;
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size;
- pthread_mutex_unlock(&dm_stat_mutex);
- }
- if (defrag->tier_conf.blocks_total) {
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->tier_conf.percent_full = GF_PERCENTAGE(
- defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total);
- pthread_mutex_unlock(&dm_stat_mutex);
- }
-
- (*total_files)++;
-}
-
-static int
-tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid,
- gfdb_link_info_t *link_info, gf_defrag_info_t *defrag,
- query_cbk_args_t *query_cbk_args, dict_t *migrate_data,
- int *per_link_status, int *total_files,
- uint64_t *total_migrated_bytes)
-{
- int ret = -1;
- struct iatt current = {
- 0,
- };
- struct iatt par_stbuf = {
- 0,
- };
- loc_t p_loc = {
- 0,
- };
- loc_t loc = {
- 0,
- };
- xlator_t *src_subvol = NULL;
- inode_t *linked_inode = NULL;
- char *parent_path = NULL;
-
- /* Lookup for parent and get the path of parent */
- gf_uuid_copy(p_loc.gfid, link_info->pargfid);
- p_loc.inode = inode_new(defrag->root_inode->table);
- if (!p_loc.inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create reference to inode"
- " for %s",
- uuid_utoa(p_loc.gfid));
-
- *per_link_status = -1;
- goto err;
- }
-
- parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf,
- per_link_status);
- if (!parent_path) {
- goto err;
- }
-
- linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf);
- inode_unref(p_loc.inode);
- p_loc.inode = linked_inode;
-
- /* Preparing File Inode */
- gf_uuid_copy(loc.gfid, gfid);
- loc.inode = inode_new(defrag->root_inode->table);
- gf_uuid_copy(loc.pargfid, link_info->pargfid);
- loc.parent = inode_ref(p_loc.inode);
-
- /* Get filename and Construct file path */
- if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc,
- per_link_status) != 0) {
- goto err;
- }
- gf_uuid_copy(loc.parent->gfid, link_info->pargfid);
-
- /* lookup file inode */
- if (tier_lookup_file(this, &p_loc, &loc, &current, per_link_status) != 0) {
- goto err;
- }
-
- if (query_cbk_args->is_promotion) {
- if (!tier_can_promote_file(this, link_info->file_name, &current,
- defrag)) {
- *per_link_status = 1;
- goto err;
- }
- }
-
- linked_inode = inode_link(loc.inode, NULL, NULL, &current);
- inode_unref(loc.inode);
- loc.inode = linked_inode;
-
- /*
- * Do not promote/demote if file already is where it
- * should be. It means another brick moved the file
- * so is not an error. So we set per_link_status = 1
- * so that we ignore counting this.
- */
- src_subvol = dht_subvol_get_cached(this, loc.inode);
-
- if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf,
- per_link_status)) {
- goto err;
- }
-
- gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s",
- (query_cbk_args->is_promotion ? "promote" : "demote"),
- src_subvol->name, loc.path);
-
- ret = tier_check_same_node(this, &loc, defrag);
- if (ret != 0) {
- if (ret < 0) {
- *per_link_status = -1;
- goto err;
- }
- ret = 0;
- /* By setting per_link_status to 1 we are
- * ignoring this status and will not be counting
- * this file for migration */
- *per_link_status = 1;
- goto err;
- }
-
- gf_uuid_copy(loc.gfid, loc.inode->gfid);
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering paused. "
- "Exiting tier_migrate_link");
- goto err;
- }
-
- ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc,
- &defrag->tier_conf);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR,
- "Failed to "
- "migrate %s ",
- loc.path);
- *per_link_status = -1;
- goto err;
- }
-
- tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes,
- total_files);
-
- ret = 0;
-
-err:
- GF_FREE((char *)loc.name);
- loc.name = NULL;
- loc_wipe(&loc);
- loc_wipe(&p_loc);
-
- if ((*total_files >= defrag->tier_conf.max_migrate_files) ||
- (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Reached cycle migration limit."
- "migrated bytes %" PRId64 " files %d",
- *total_migrated_bytes, *total_files);
- ret = -1;
- }
-
- return ret;
-}
-
-static int
-tier_migrate_using_query_file(void *_args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args;
- xlator_t *this = NULL;
- gf_defrag_info_t *defrag = NULL;
- gfdb_query_record_t *query_record = NULL;
- gfdb_link_info_t *link_info = NULL;
- dict_t *migrate_data = NULL;
- /*
- * per_file_status and per_link_status
- * 0 : success
- * -1 : failure
- * 1 : ignore the status and don't count for migration
- * */
- int per_file_status = 0;
- int per_link_status = 0;
- int total_status = 0;
- dht_conf_t *conf = NULL;
- uint64_t total_migrated_bytes = 0;
- int total_files = 0;
- loc_t root_loc = {0};
- gfdb_time_t start_time = {0};
- gfdb_time_t current_time = {0};
- int total_time = 0;
- int max_time = 0;
- gf_boolean_t emergency_demote_mode = _gf_false;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
- GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = query_cbk_args->defrag;
- migrate_data = dict_new();
- if (!migrate_data)
- goto out;
-
- emergency_demote_mode = (!query_cbk_args->is_promotion &&
- is_hot_tier_full(&defrag->tier_conf));
-
- if (tier_set_migrate_data(migrate_data) != 0) {
- goto out;
- }
-
- dht_build_root_loc(defrag->root_inode, &root_loc);
-
- ret = gettimeofday(&start_time, NULL);
- if (query_cbk_args->is_promotion) {
- max_time = defrag->tier_conf.tier_promote_frequency;
- } else {
- max_time = defrag->tier_conf.tier_demote_frequency;
- }
-
- /* Per file */
- while ((ret = read_query_record_list(query_cbk_args->qfile_array,
- &query_record)) != 0) {
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to fetch query record "
- "from query file");
- goto out;
- }
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Exiting tier migration as"
- "defrag status is not started");
- goto out;
- }
-
- ret = gettimeofday(&current_time, NULL);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Could not get current time.");
- goto out;
- }
-
- total_time = current_time.tv_sec - start_time.tv_sec;
- if (total_time > max_time) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Max cycle time reached. Exiting migration.");
- goto out;
- }
-
- per_file_status = 0;
- per_link_status = 0;
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Tiering paused. "
- "Exiting tier_migrate_using_query_file");
- break;
- }
-
- if (defrag->tier_conf.mode == TIER_MODE_WM) {
- ret = tier_get_fs_stat(this, &root_loc);
- if (ret != 0) {
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_get_fs_stat() FAILED ... "
- "skipping file migrations until next cycle");
- break;
- }
-
- if (!tier_do_migration(this, query_cbk_args->is_promotion)) {
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- /* We have crossed the high watermark. Stop processing
- * files if this is a promotion cycle so demotion gets
- * a chance to start if not already running*/
-
- if (query_cbk_args->is_promotion &&
- is_hot_tier_full(&defrag->tier_conf)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "High watermark crossed during "
- "promotion. Exiting "
- "tier_migrate_using_query_file");
- break;
- }
- continue;
- }
- }
-
- per_link_status = 0;
-
- /* For now we only support single link migration. And we will
- * ignore other hard links in the link info list of query record
- * TODO: Multiple hard links migration */
- if (!list_empty(&query_record->link_list)) {
- link_info = list_first_entry(&query_record->link_list,
- gfdb_link_info_t, list);
- }
- if (link_info != NULL) {
- if (tier_migrate_link(this, conf, query_record->gfid, link_info,
- defrag, query_cbk_args, migrate_data,
- &per_link_status, &total_files,
- &total_migrated_bytes) != 0) {
- gf_msg(
- this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "%s failed for %s(gfid:%s)",
- (query_cbk_args->is_promotion ? "Promotion" : "Demotion"),
- link_info->file_name, uuid_utoa(query_record->gfid));
- }
- }
- per_file_status = per_link_status;
-
- if (per_file_status < 0) { /* Failure */
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->total_failures++;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else if (per_file_status == 0) { /* Success */
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->total_files++;
- pthread_mutex_unlock(&dm_stat_mutex);
- } else if (per_file_status == 1) { /* Ignore */
- per_file_status = 0;
- /* Since this attempt was ignored we
- * decrement the lookup count*/
- pthread_mutex_lock(&dm_stat_mutex);
- defrag->num_files_lookedup--;
- pthread_mutex_unlock(&dm_stat_mutex);
- }
- total_status = total_status + per_file_status;
- per_link_status = 0;
- per_file_status = 0;
-
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- /* If we are demoting and the entry watermark was HI, then
- * we are done with emergency demotions if the current
- * watermark has fallen below hi-watermark level
- */
- if (emergency_demote_mode) {
- if (tier_check_watermark(this) == 0) {
- if (!is_hot_tier_full(&defrag->tier_conf)) {
- break;
- }
- }
- }
- }
-
-out:
- if (migrate_data)
- dict_unref(migrate_data);
-
- gfdb_methods.gfdb_query_record_free(query_record);
- query_record = NULL;
-
- return total_status;
-}
-
-/* This is the call back function per record/file from data base */
-static int
-tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = _args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out);
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out);
- GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out);
-
- ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd,
- gfdb_query_record);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed writing query record to query file");
- goto out;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup++;
- pthread_mutex_unlock(&dm_stat_mutex);
-
- ret = 0;
-out:
- return ret;
-}
-
-/* Create query file in tier process */
-static int
-tier_process_self_query(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- dict_t *ctr_ipc_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- db_path = local_brick->brick_db_path;
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict,
- (char *)gfdb_methods.get_db_path_key(), db_path, ret,
- out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- /* Query for eligible files from db */
- query_cbk_args->query_fd = open(local_brick->qfile_path,
- O_WRONLY | O_CREAT | O_APPEND,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (query_cbk_args->query_fd < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open query file %s", local_brick->qfile_path);
- goto out;
- }
- if (!gfdb_brick_info->_gfdb_promote) {
- if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) {
- /* emergency demotion mode */
- ret = gfdb_methods.find_all(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- query_cbk_args->defrag->tier_conf.query_limit);
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_unchanged_for_time(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
- } else {
- ret = gfdb_methods.find_unchanged_for_time_freq(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold, _gf_false);
- }
- }
- } else {
- if (query_cbk_args->defrag->write_freq_threshold == 0 &&
- query_cbk_args->defrag->read_freq_threshold == 0) {
- ret = gfdb_methods.find_recently_changed_files(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp);
- } else {
- ret = gfdb_methods.find_recently_changed_files_freq(
- conn_node, tier_gf_query_callback, (void *)query_cbk_args,
- gfdb_brick_info->time_stamp,
- query_cbk_args->defrag->write_freq_threshold,
- query_cbk_args->defrag->read_freq_threshold, _gf_false);
- }
- }
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: query from db failed");
- goto out;
- }
-
- /*Clear the heat on the DB entries*/
- /*Preparing ctr_ipc_dict*/
- ctr_ipc_dict = dict_new();
- if (!ctr_ipc_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_dict cannot initialized");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_CLEAR_OPS, ret, out);
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
- NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed clearing the heat "
- "on db %s error %d",
- local_brick->brick_db_path, ret);
- goto out;
- }
-
- ret = 0;
-out:
- if (params_dict) {
- dict_unref(params_dict);
- params_dict = NULL;
- }
-
- if (ctr_ipc_dict) {
- dict_unref(ctr_ipc_dict);
- ctr_ipc_dict = NULL;
- }
-
- if (query_cbk_args && query_cbk_args->query_fd >= 0) {
- sys_close(query_cbk_args->query_fd);
- query_cbk_args->query_fd = -1;
- }
- gfdb_methods.fini_db(conn_node);
-
- return ret;
-}
-
-/*Ask CTR to create the query file*/
-static int
-tier_process_ctr_query(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- dict_t *ctr_ipc_in_dict = NULL;
- dict_t *ctr_ipc_out_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
- gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
- int count = 0;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- /*Preparing ctr_ipc_in_dict*/
- ctr_ipc_in_dict = dict_new();
- if (!ctr_ipc_in_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_in_dict cannot initialized");
- goto out;
- }
-
- ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t),
- gf_tier_mt_ipc_ctr_params_t);
- if (!ipc_ctr_params) {
- goto out;
- }
-
- /* set all the query params*/
- ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote;
-
- ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag
- ->write_freq_threshold;
-
- ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag
- ->read_freq_threshold;
-
- ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit;
-
- ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote &&
- query_cbk_args->defrag->tier_conf
- .watermark_last == TIER_WM_HI);
-
- memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp,
- sizeof(gfdb_time_t));
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_QUERY_OPS, ret, out);
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict,
- GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path,
- ret, out);
-
- ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
- ipc_ctr_params, sizeof(*ipc_ctr_params));
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed setting %s to params dictionary",
- GFDB_IPC_CTR_GET_QUERY_PARAMS);
- GF_FREE(ipc_ctr_params);
- goto out;
- }
- ipc_ctr_params = NULL;
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict,
- &ctr_ipc_out_dict);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR,
- "Failed query on %s ret %d", local_brick->brick_db_path, ret);
- goto out;
- }
-
- ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT,
- &count);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed getting count "
- "of records on %s",
- local_brick->brick_db_path);
- goto out;
- }
-
- if (count < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed query on %s", local_brick->brick_db_path);
- ret = -1;
- goto out;
- }
-
- pthread_mutex_lock(&dm_stat_mutex);
- query_cbk_args->defrag->num_files_lookedup = count;
- pthread_mutex_unlock(&dm_stat_mutex);
-
- ret = 0;
-out:
-
- if (ctr_ipc_in_dict) {
- dict_unref(ctr_ipc_in_dict);
- ctr_ipc_in_dict = NULL;
- }
-
- if (ctr_ipc_out_dict) {
- dict_unref(ctr_ipc_out_dict);
- ctr_ipc_out_dict = NULL;
- }
-
- GF_FREE(ipc_ctr_params);
-
- return ret;
-}
-
-/* This is the call back function for each brick from hot/cold bricklist
- * It picks up each bricks db and queries for eligible files for migration.
- * The list of eligible files are populated in appropriate query files*/
-static int
-tier_process_brick(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- dict_t *ctr_ipc_in_dict = NULL;
- dict_t *ctr_ipc_out_dict = NULL;
- char *strval = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", local_brick, out);
-
- GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
-
- if (dht_tier_db_type == GFDB_SQLITE3) {
- /*Preparing ctr_ipc_in_dict*/
- ctr_ipc_in_dict = dict_new();
- if (!ctr_ipc_in_dict) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_in_dict cannot initialized");
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_GET_DB_PARAM_OPS);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_KEY);
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, "");
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_GET_DB_PARAM_OPS);
- goto out;
- }
-
- ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY,
- "journal_mode");
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- GFDB_IPC_CTR_GET_DB_KEY);
- goto out;
- }
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR,
- ctr_ipc_in_dict, &ctr_ipc_out_dict);
- if (ret || ctr_ipc_out_dict == NULL) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to get "
- "journal_mode of sql db %s",
- local_brick->brick_db_path);
- goto out;
- }
-
- ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval);
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED,
- "Failed to get %s "
- "from params dictionary"
- "journal_mode",
- strval);
- goto out;
- }
-
- if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) {
- ret = tier_process_self_query(local_brick, args);
- if (ret) {
- goto out;
- }
- } else {
- ret = tier_process_ctr_query(local_brick, args);
- if (ret) {
- goto out;
- }
- }
- ret = 0;
-
- } else {
- ret = tier_process_self_query(local_brick, args);
- if (ret) {
- goto out;
- }
- }
-
- ret = 0;
-out:
- if (ctr_ipc_in_dict)
- dict_unref(ctr_ipc_in_dict);
-
- if (ctr_ipc_out_dict)
- dict_unref(ctr_ipc_out_dict);
-
- return ret;
-}
-
-static int
-tier_build_migration_qfile(migration_args_t *args,
- query_cbk_args_t *query_cbk_args,
- gf_boolean_t is_promotion)
-{
- gfdb_time_t current_time;
- gfdb_brick_info_t gfdb_brick_info;
- gfdb_time_t time_in_past;
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
- int i = 0;
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(args->this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
-
- /* The migration daemon may run a varying numberof usec after the */
- /* sleep call triggers. A file may be registered in CTR some number */
- /* of usec X after the daemon started and missed in the subsequent */
- /* cycle if the daemon starts Y usec after the period in seconds */
- /* where Y>X. Normalize away this problem by always setting usec */
- /* to 0. */
- time_in_past.tv_usec = 0;
-
- gfdb_brick_info.time_stamp = &time_in_past;
- gfdb_brick_info._gfdb_promote = is_promotion;
- gfdb_brick_info._query_cbk_args = query_cbk_args;
-
- list_for_each_entry(local_brick, args->brick_list, list)
- {
- /* Construct query file path for this brick
- * i.e
- * /var/run/gluster/xlator_name/
- * {promote/demote}-brickname-indexinbricklist
- * So that no two query files will have same path even
- * bricks have the same name
- * */
- snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
- GET_QFILE_PATH(gfdb_brick_info._gfdb_promote),
- local_brick->brick_name, i);
-
- /* Delete any old query files for this brick */
- sys_unlink(local_brick->qfile_path);
-
- ret = tier_process_brick(local_brick, &gfdb_brick_info);
- if (ret) {
- gf_msg(args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n",
- local_brick->brick_db_path);
- }
- i++;
- }
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_migrate_files_using_qfile(migration_args_t *comp,
- query_cbk_args_t *query_cbk_args)
-{
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
- tier_brick_list_t *temp = NULL;
- gfdb_time_t current_time = {
- 0,
- };
- ssize_t qfile_array_size = 0;
- int count = 0;
- int temp_fd = 0;
- gf_tier_conf_t *tier_conf = NULL;
-
- tier_conf = &(query_cbk_args->defrag->tier_conf);
-
- /* Time for error query files */
- gettimeofday(&current_time, NULL);
-
- /* Build the qfile list */
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- qfile_array_size++;
- }
- query_cbk_args->qfile_array = qfile_array_new(qfile_array_size);
- if (!query_cbk_args->qfile_array) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to create new "
- "qfile_array");
- goto out;
- }
-
- /*Open all qfiles*/
- count = 0;
- query_cbk_args->qfile_array->exhausted_count = 0;
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- temp_fd = query_cbk_args->qfile_array->fd_array[count];
- temp_fd = open(local_brick->qfile_path, O_RDONLY,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (temp_fd < 0) {
- gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR,
- "Failed to open "
- "%s to the query file",
- local_brick->qfile_path);
- query_cbk_args->qfile_array->exhausted_count++;
- }
- query_cbk_args->qfile_array->fd_array[count] = temp_fd;
- count++;
- }
-
- /* Moving the query file index to the next, so that we won't the same
- * query file every cycle as the first one */
- query_cbk_args->qfile_array
- ->next_index = (query_cbk_args->is_promotion)
- ? tier_conf->last_promote_qfile_index
- : tier_conf->last_demote_qfile_index;
- shift_next_index(query_cbk_args->qfile_array);
- if (query_cbk_args->is_promotion) {
- tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array
- ->next_index;
- } else {
- tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array
- ->next_index;
- }
-
- /* Migrate files using query file list */
- ret = tier_migrate_using_query_file((void *)query_cbk_args);
-out:
- qfile_array_free(query_cbk_args->qfile_array);
-
- /* If there is an error rename all the query files to .err files
- * with a timestamp for better debugging */
- if (ret) {
- struct tm tm = {
- 0,
- };
- char time_str[128] = {
- 0,
- };
- char query_file_path_err[PATH_MAX] = {
- 0,
- };
- int32_t len = 0;
-
- /* Time format for error query files */
- gmtime_r(&current_time.tv_sec, &tm);
- strftime(time_str, sizeof(time_str), "%F-%T", &tm);
-
- list_for_each_entry_safe(local_brick, temp, comp->brick_list, list)
- {
- /* rename error qfile*/
- len = snprintf(query_file_path_err, sizeof(query_file_path_err),
- "%s-%s.err", local_brick->qfile_path, time_str);
- if ((len >= 0) && (len < sizeof(query_file_path_err))) {
- if (sys_rename(local_brick->qfile_path, query_file_path_err) ==
- -1)
- gf_msg_debug("tier", 0,
- "rename "
- "failed");
- }
- }
- }
-
- query_cbk_args->qfile_array = NULL;
-
- return ret;
-}
-
-int
-tier_demote(migration_args_t *demotion_args)
-{
- query_cbk_args_t query_cbk_args;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("tier", demotion_args, out);
- GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out);
- GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list,
- out);
- GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out);
-
- THIS = demotion_args->this;
-
- query_cbk_args.this = demotion_args->this;
- query_cbk_args.defrag = demotion_args->defrag;
- query_cbk_args.is_promotion = 0;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args);
- if (ret)
- goto out;
-
-out:
- demotion_args->return_value = ret;
- return ret;
-}
-
-int
-tier_promote(migration_args_t *promotion_args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out);
- GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list,
- out);
- GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag,
- out);
-
- THIS = promotion_args->this;
-
- query_cbk_args.this = promotion_args->this;
- query_cbk_args.defrag = promotion_args->defrag;
- query_cbk_args.is_promotion = 1;
-
- /*Build the query file using bricklist*/
- ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true);
- if (ret)
- goto out;
-
- /* Migrate files using the query file */
- ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args);
- if (ret)
- goto out;
-
-out:
- promotion_args->return_value = ret;
- return ret;
-}
-
-/*
- * Command the CTR on a brick to compact the local database using an IPC
- */
-static int
-tier_process_self_compact(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
- char *db_path = NULL;
- query_cbk_args_t *query_cbk_args = NULL;
- xlator_t *this = NULL;
- gfdb_conn_node_t *conn_node = NULL;
- dict_t *params_dict = NULL;
- dict_t *ctr_ipc_dict = NULL;
- gfdb_brick_info_t *gfdb_brick_info = args;
-
- /*Init of all the essentials*/
- GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out);
- query_cbk_args = gfdb_brick_info->_query_cbk_args;
-
- GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out);
- this = query_cbk_args->this;
-
- GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out);
-
- GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out);
-
- db_path = local_brick->brick_db_path;
-
- /*Preparing DB parameters before init_db i.e getting db connection*/
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "DB Params cannot initialized");
- goto out;
- }
- SET_DB_PARAM_TO_DICT(this->name, params_dict,
- (char *)gfdb_methods.get_db_path_key(), db_path, ret,
- out);
-
- /*Get the db connection*/
- conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type);
- if (!conn_node) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "FATAL: Failed initializing db operations");
- goto out;
- }
-
- ret = 0;
-
- /*Preparing ctr_ipc_dict*/
- ctr_ipc_dict = dict_new();
- if (!ctr_ipc_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "ctr_ipc_dict cannot initialized");
- goto out;
- }
-
- ret = dict_set_int32(ctr_ipc_dict, "compact_active",
- query_cbk_args->defrag->tier_conf.compact_active);
-
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- "compact_active");
- goto out;
- }
-
- ret = dict_set_int32(
- ctr_ipc_dict, "compact_mode_switched",
- query_cbk_args->defrag->tier_conf.compact_mode_switched);
-
- if (ret) {
- gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED,
- "Failed to set %s "
- "to params dictionary",
- "compact_mode_switched");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY,
- GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out);
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Starting Compaction IPC");
-
- ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict,
- NULL);
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Ending Compaction IPC");
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed compaction "
- "on db %s error %d",
- local_brick->brick_db_path, ret);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "SUCCESS: %s Compaction", local_brick->brick_name);
-
- ret = 0;
-out:
- if (params_dict) {
- dict_unref(params_dict);
- params_dict = NULL;
- }
-
- if (ctr_ipc_dict) {
- dict_unref(ctr_ipc_dict);
- ctr_ipc_dict = NULL;
- }
-
- gfdb_methods.fini_db(conn_node);
-
- return ret;
-}
-
-/*
- * This is the call back function for each brick from hot/cold bricklist.
- * It determines the database type on each brick and calls the corresponding
- * function to prepare the compaction IPC.
- */
-static int
-tier_compact_db_brick(tier_brick_list_t *local_brick, void *args)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("tier", local_brick, out);
-
- GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out);
-
- ret = tier_process_self_compact(local_brick, args);
- if (ret) {
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Brick %s did not compact", local_brick->brick_name);
- goto out;
- }
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-static int
-tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args)
-{
- gfdb_time_t current_time;
- gfdb_brick_info_t gfdb_brick_info;
- gfdb_time_t time_in_past;
- int ret = -1;
- tier_brick_list_t *local_brick = NULL;
-
- time_in_past.tv_sec = args->freq_time;
- time_in_past.tv_usec = 0;
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(args->this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time");
- goto out;
- }
- time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec;
-
- /* The migration daemon may run a varying numberof usec after the sleep
- call triggers. A file may be registered in CTR some number of usec X
- after the daemon started and missed in the subsequent cycle if the
- daemon starts Y usec after the period in seconds where Y>X. Normalize
- away this problem by always setting usec to 0. */
- time_in_past.tv_usec = 0;
-
- gfdb_brick_info.time_stamp = &time_in_past;
-
- /* This is meant to say we are always compacting at this point */
- /* We simply borrow the promotion flag to do this */
- gfdb_brick_info._gfdb_promote = 1;
-
- gfdb_brick_info._query_cbk_args = query_cbk_args;
-
- list_for_each_entry(local_brick, args->brick_list, list)
- {
- gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Start compaction for %s", local_brick->brick_name);
-
- ret = tier_compact_db_brick(local_brick, &gfdb_brick_info);
- if (ret) {
- gf_msg(args->this->name, GF_LOG_ERROR, 0,
- DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n",
- local_brick->brick_db_path);
- }
-
- gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "End compaction for %s", local_brick->brick_name);
- }
- ret = 0;
-out:
- return ret;
-}
-
-static int
-tier_compact(void *args)
-{
- int ret = -1;
- query_cbk_args_t query_cbk_args;
- migration_args_t *compaction_args = args;
-
- GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out);
- GF_VALIDATE_OR_GOTO(compaction_args->this->name,
- compaction_args->brick_list, out);
- GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag,
- out);
-
- THIS = compaction_args->this;
-
- query_cbk_args.this = compaction_args->this;
- query_cbk_args.defrag = compaction_args->defrag;
- query_cbk_args.is_compaction = 1;
-
- /* Send the compaction pragma out to all the bricks on the bricklist. */
- /* tier_get_bricklist ensures all bricks on the list are local to */
- /* this node. */
- ret = tier_send_compact(compaction_args, &query_cbk_args);
- if (ret)
- goto out;
-
- ret = 0;
-out:
- compaction_args->return_value = ret;
- return ret;
-}
-
-static int
-tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head)
-{
- xlator_list_t *child = NULL;
- char *rv = NULL;
- char *rh = NULL;
- char *brickname = NULL;
- char db_name[PATH_MAX] = "";
- int ret = 0;
- tier_brick_list_t *local_brick = NULL;
- int32_t len = 0;
-
- GF_VALIDATE_OR_GOTO("tier", xl, out);
- GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out);
-
- /*
- * This function obtains remote subvolumes and filters out only
- * those running on the same node as the tier daemon.
- */
- if (strcmp(xl->type, "protocol/client") == 0) {
- ret = dict_get_str(xl->options, "remote-host", &rh);
- if (ret < 0)
- goto out;
-
- if (gf_is_local_addr(rh)) {
- local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t),
- gf_tier_mt_bricklist_t);
- if (!local_brick) {
- goto out;
- }
-
- ret = dict_get_str(xl->options, "remote-subvolume", &rv);
- if (ret < 0)
- goto out;
-
- brickname = strrchr(rv, '/') + 1;
- snprintf(db_name, sizeof(db_name), "%s.db", brickname);
-
- local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char);
- if (!local_brick->brick_db_path) {
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "Failed to allocate memory for"
- " bricklist.");
- ret = -1;
- goto out;
- }
-
- len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv,
- GF_HIDDEN_PATH, db_name);
- if ((len < 0) || (len >= PATH_MAX)) {
- gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS,
- "DB path too long");
- ret = -1;
- goto out;
- }
-
- local_brick->xlator = xl;
-
- snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname);
-
- list_add_tail(&(local_brick->list), local_bricklist_head);
-
- ret = 0;
- goto out;
- }
- }
-
- for (child = xl->children; child; child = child->next) {
- ret = tier_get_bricklist(child->xlator, local_bricklist_head);
- if (ret) {
- goto out;
- }
- }
-
- ret = 0;
-out:
-
- if (ret) {
- if (local_brick) {
- GF_FREE(local_brick->brick_db_path);
- }
- GF_FREE(local_brick);
- }
-
- return ret;
-}
-
-int
-tier_get_freq_demote(gf_tier_conf_t *tier_conf)
-{
- if ((tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return DEFAULT_DEMOTE_DEGRADED;
- else
- return tier_conf->tier_demote_frequency;
-}
-
-int
-tier_get_freq_promote(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_promote_frequency;
-}
-
-int
-tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_compact_hot_frequency;
-}
-
-int
-tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf)
-{
- return tier_conf->tier_compact_cold_frequency;
-}
-
-static int
-tier_check_demote(gfdb_time_t current_time, int freq)
-{
- return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
-}
-
-static gf_boolean_t
-tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
- int freq)
-{
- if ((tier_conf->mode == TIER_MODE_WM) &&
- (tier_conf->watermark_last == TIER_WM_HI))
- return _gf_false;
-
- else
- return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false;
-}
-
-static gf_boolean_t
-tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time,
- int freq_compact)
-{
- if (!(tier_conf->compact_active || tier_conf->compact_mode_switched))
- return _gf_false;
-
- return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false;
-}
-
-void
-clear_bricklist(struct list_head *brick_list)
-{
- tier_brick_list_t *local_brick = NULL;
- tier_brick_list_t *temp = NULL;
-
- if (list_empty(brick_list)) {
- return;
- }
-
- list_for_each_entry_safe(local_brick, temp, brick_list, list)
- {
- list_del(&local_brick->list);
- GF_FREE(local_brick->brick_db_path);
- GF_FREE(local_brick);
- }
-}
-
-static void
-set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold)
-{
- tier_brick_list_t *local_brick = NULL;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO("tier", brick_list, out);
-
- list_for_each_entry(local_brick, brick_list, list)
- {
- /* Construct query file path for this brick
- * i.e
- * /var/run/gluster/xlator_name/
- * {promote/demote}-brickname-indexinbricklist
- * So that no two query files will have same path even
- * bricks have the same name
- * */
- snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d",
- GET_QFILE_PATH(is_cold), local_brick->brick_name, i);
- i++;
- }
-out:
- return;
-}
-
-static int
-tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time)
-{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- gf_boolean_t is_hot_tier = args->is_hot_tier;
- int freq = 0;
- int ret = -1;
- const char *tier_type = is_hot_tier ? "hot" : "cold";
-
- this = args->this;
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- tier_conf = &defrag->tier_conf;
-
- freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf)
- : tier_get_freq_compact_cold(tier_conf);
-
- defrag->tier_conf.compact_mode_switched =
- is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot
- : defrag->tier_conf.compact_mode_switched_cold;
-
- gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS,
- "Compact mode %i", defrag->tier_conf.compact_mode_switched);
-
- if (tier_check_compact(tier_conf, current_time, freq)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Start compaction on %s tier", tier_type);
-
- args->freq_time = freq;
- ret = tier_compact(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Compaction failed on "
- "%s tier",
- tier_type);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "End compaction on %s tier", tier_type);
-
- if (is_hot_tier) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_false;
- } else {
- defrag->tier_conf.compact_mode_switched_cold = _gf_false;
- }
- }
-
-out:
- return ret;
-}
-
-static int
-tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm)
-{
- if (mode == TIER_MODE_WM && wm == TIER_WM_HI)
- return WM_INTERVAL_EMERG;
-
- return WM_INTERVAL;
-}
-
-/*
- * Main tiering loop. This is called from the promotion and the
- * demotion threads spawned in tier_start().
- *
- * Every second, wake from sleep to perform tasks.
- * 1. Check trigger to migrate data.
- * 2. Check for state changes (pause, unpause, stop).
- */
-static void *
-tier_run(void *in_args)
-{
- dht_conf_t *conf = NULL;
- gfdb_time_t current_time = {0};
- int freq = 0;
- int ret = 0;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- gf_tier_conf_t *tier_conf = NULL;
- loc_t root_loc = {0};
- int check_watermark = 0;
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
- migration_args_t *args = in_args;
- GF_VALIDATE_OR_GOTO("tier", args, out);
- GF_VALIDATE_OR_GOTO("tier", args->brick_list, out);
-
- this = args->this;
- GF_VALIDATE_OR_GOTO("tier", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO("tier", conf, out);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO("tier", defrag, out);
-
- if (list_empty(args->brick_list)) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR,
- "Brick list for tier is empty. Exiting.");
- goto out;
- }
-
- defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
- tier_conf = &defrag->tier_conf;
-
- dht_build_root_loc(defrag->root_inode, &root_loc);
-
- while (1) {
- /*
- * Check if a graph switch occurred. If so, stop migration
- * thread. It will need to be restarted manually.
- */
- any = THIS->ctx->active->first;
- xlator = xlator_search_by_name(any, this->name);
-
- if (xlator != this) {
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Detected graph switch. Exiting migration "
- "daemon.");
- goto out;
- }
-
- gf_defrag_check_pause_tier(tier_conf);
-
- sleep(1);
-
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_status != "
- "GF_DEFRAG_STATUS_STARTED");
- goto out;
- }
-
- if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER ||
- defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
- ret = 0;
- defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
- gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR,
- "defrag->defrag_cmd == "
- "GF_DEFRAG_CMD_START_DETACH_TIER");
- goto out;
- }
-
- if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)
- continue;
-
- /* To have proper synchronization amongst all
- * brick holding nodes, so that promotion and demotions
- * start atomically w.r.t promotion/demotion frequency
- * period, all nodes should have their system time
- * in-sync with each other either manually set or
- * using a NTP server*/
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- DHT_MSG_SYS_CALL_GET_TIME_FAILED,
- "Failed to get current time");
- goto out;
- }
-
- check_watermark++;
-
- /* emergency demotion requires frequent watermark monitoring */
- if (check_watermark >=
- tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) {
- check_watermark = 0;
- if (tier_conf->mode == TIER_MODE_WM) {
- ret = tier_get_fs_stat(this, &root_loc);
- if (ret != 0) {
- continue;
- }
- ret = tier_check_watermark(this);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_CRITICAL, errno,
- DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark");
- continue;
- }
- }
- }
-
- if (args->is_promotion) {
- freq = tier_get_freq_promote(tier_conf);
-
- if (tier_check_promote(tier_conf, current_time, freq)) {
- args->freq_time = freq;
- ret = tier_promote(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Promotion failed");
- }
- }
- } else if (args->is_compaction) {
- tier_prepare_compact(args, current_time);
- } else {
- freq = tier_get_freq_demote(tier_conf);
-
- if (tier_check_demote(current_time, freq)) {
- args->freq_time = freq;
- ret = tier_demote(args);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Demotion failed");
- }
- }
- }
-
- /* Check the statfs immediately after the processing threads
- return */
- check_watermark = WM_INTERVAL;
- }
-
- ret = 0;
-out:
-
- args->return_value = ret;
-
- return NULL;
-}
-
-int
-tier_start(xlator_t *this, gf_defrag_info_t *defrag)
-{
- pthread_t promote_thread;
- pthread_t demote_thread;
- pthread_t hot_compact_thread;
- pthread_t cold_compact_thread;
- int ret = -1;
- struct list_head bricklist_hot = {0};
- struct list_head bricklist_cold = {0};
- migration_args_t promotion_args = {0};
- migration_args_t demotion_args = {0};
- migration_args_t hot_compaction_args = {0};
- migration_args_t cold_compaction_args = {0};
- dht_conf_t *conf = NULL;
-
- INIT_LIST_HEAD((&bricklist_hot));
- INIT_LIST_HEAD((&bricklist_cold));
-
- conf = this->private;
-
- tier_get_bricklist(conf->subvolumes[1], &bricklist_hot);
- set_brick_list_qpath(&bricklist_hot, _gf_false);
-
- demotion_args.this = this;
- demotion_args.brick_list = &bricklist_hot;
- demotion_args.defrag = defrag;
- demotion_args.is_promotion = _gf_false;
- demotion_args.is_compaction = _gf_false;
-
- ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args,
- "tierdem");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start demotion thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto cleanup;
- }
-
- tier_get_bricklist(conf->subvolumes[0], &bricklist_cold);
- set_brick_list_qpath(&bricklist_cold, _gf_true);
-
- promotion_args.this = this;
- promotion_args.brick_list = &bricklist_cold;
- promotion_args.defrag = defrag;
- promotion_args.is_promotion = _gf_true;
-
- ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args,
- "tierpro");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start promotion thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawned;
- }
-
- hot_compaction_args.this = this;
- hot_compaction_args.brick_list = &bricklist_hot;
- hot_compaction_args.defrag = defrag;
- hot_compaction_args.is_promotion = _gf_false;
- hot_compaction_args.is_compaction = _gf_true;
- hot_compaction_args.is_hot_tier = _gf_true;
-
- ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run,
- &hot_compaction_args, "tierhcom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start compaction thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawnedpromote;
- }
-
- cold_compaction_args.this = this;
- cold_compaction_args.brick_list = &bricklist_cold;
- cold_compaction_args.defrag = defrag;
- cold_compaction_args.is_promotion = _gf_false;
- cold_compaction_args.is_compaction = _gf_true;
- cold_compaction_args.is_hot_tier = _gf_false;
-
- ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run,
- &cold_compaction_args, "tierccom");
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Failed to start compaction thread.");
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto waitforspawnedhotcompact;
- }
- pthread_join(cold_compact_thread, NULL);
-
-waitforspawnedhotcompact:
- pthread_join(hot_compact_thread, NULL);
-
-waitforspawnedpromote:
- pthread_join(promote_thread, NULL);
-
-waitforspawned:
- pthread_join(demote_thread, NULL);
-
-cleanup:
- clear_bricklist(&bricklist_cold);
- clear_bricklist(&bricklist_hot);
- return ret;
-}
-
-int32_t
-tier_migration_needed(xlator_t *this)
-{
- gf_defrag_info_t *defrag = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- conf = this->private;
-
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out);
-
- defrag = conf->defrag;
-
- if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
- ret = 1;
-out:
- return ret;
-}
-
-int32_t
-tier_migration_get_dst(xlator_t *this, dht_local_t *local)
-{
- dht_conf_t *conf = NULL;
- int32_t ret = -1;
- gf_defrag_info_t *defrag = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- defrag = conf->defrag;
-
- if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) {
- local->rebalance.target_node = conf->subvolumes[0];
-
- } else if (conf->subvolumes[0] == local->cached_subvol)
- local->rebalance.target_node = conf->subvolumes[1];
- else
- local->rebalance.target_node = conf->subvolumes[0];
-
- if (local->rebalance.target_node)
- ret = 0;
-
-out:
- return ret;
-}
-
-xlator_t *
-tier_search(xlator_t *this, dht_layout_t *layout, const char *name)
-{
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- conf = this->private;
-
- subvol = TIER_HASHED_SUBVOL;
-
-out:
- return subvol;
-}
-
-static int
-tier_load_externals(xlator_t *this)
-{
- int ret = -1;
- char *libpathfull = (LIBDIR "/libgfdb.so.0");
- get_gfdb_methods_t get_gfdb_methods;
-
- GF_VALIDATE_OR_GOTO("this", this, out);
-
- libhandle = dlopen(libpathfull, RTLD_NOW);
- if (!libhandle) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Error loading libgfdb.so %s\n", dlerror());
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods");
- if (!get_gfdb_methods) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Error loading get_gfdb_methods()");
- ret = -1;
- goto out;
- }
-
- get_gfdb_methods(&gfdb_methods);
-
- ret = 0;
-
-out:
- if (ret && libhandle)
- dlclose(libhandle);
-
- return ret;
-}
-
-static tier_mode_t
-tier_validate_mode(char *mode)
-{
- int ret = -1;
-
- if (strcmp(mode, "test") == 0) {
- ret = TIER_MODE_TEST;
- } else {
- ret = TIER_MODE_WM;
- }
-
- return ret;
-}
-
-static gf_boolean_t
-tier_validate_compact_mode(char *mode)
-{
- gf_boolean_t ret = _gf_false;
-
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_validate_compact_mode: mode = %s", mode);
-
- if (!strcmp(mode, "on")) {
- ret = _gf_true;
- } else {
- ret = _gf_false;
- }
-
- gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS,
- "tier_validate_compact_mode: ret = %i", ret);
-
- return ret;
-}
-
-int
-tier_init_methods(xlator_t *this)
-{
- int ret = -1;
- dht_conf_t *conf = NULL;
- dht_methods_t *methods = NULL;
-
- GF_VALIDATE_OR_GOTO("tier", this, err);
-
- conf = this->private;
-
- methods = &(conf->methods);
-
- methods->migration_get_dst_subvol = tier_migration_get_dst;
- methods->migration_other = tier_start;
- methods->migration_needed = tier_migration_needed;
- methods->layout_search = tier_search;
-
- ret = 0;
-err:
- return ret;
-}
-
-static void
-tier_save_vol_name(xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *suffix = NULL;
- int name_len = 0;
-
- conf = this->private;
- defrag = conf->defrag;
-
- suffix = strstr(this->name, "-tier-dht");
-
- if (suffix)
- name_len = suffix - this->name;
- else
- name_len = strlen(this->name);
-
- if (name_len > GD_VOLUME_NAME_MAX)
- name_len = GD_VOLUME_NAME_MAX;
-
- strncpy(defrag->tier_conf.volname, this->name, name_len);
- defrag->tier_conf.volname[name_len] = 0;
-}
-
-int
-tier_init(xlator_t *this)
-{
- int ret = -1;
- int freq = 0;
- int maxsize = 0;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *voldir = NULL;
- char *mode = NULL;
- char *paused = NULL;
- tier_mode_t tier_mode = DEFAULT_TIER_MODE;
- gf_boolean_t compact_mode = _gf_false;
-
- ret = dht_init(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init failed");
- goto out;
- }
-
- conf = this->private;
-
- ret = tier_init_methods(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init_methods failed");
- goto out;
- }
-
- if (conf->subvolume_cnt != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Invalid number of subvolumes %d", conf->subvolume_cnt);
- goto out;
- }
-
- /* if instatiated from client side initialization is complete. */
- if (!conf->defrag) {
- ret = 0;
- goto out;
- }
-
- /* if instatiated from server side, load db libraries */
- ret = tier_load_externals(this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "Could not load externals. Aborting");
- goto out;
- }
-
- defrag = conf->defrag;
-
- defrag->tier_conf.last_demote_qfile_index = 0;
- defrag->tier_conf.last_promote_qfile_index = 0;
-
- defrag->tier_conf.is_tier = 1;
- defrag->this = this;
-
- ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize);
- if (ret) {
- maxsize = 0;
- }
-
- defrag->tier_conf.tier_max_promote_size = maxsize;
-
- ret = dict_get_int32(this->options, "tier-promote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_PROMOTE_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_promote_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-demote-frequency", &freq);
- if (ret) {
- freq = DEFAULT_DEMOTE_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_demote_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq);
- if (ret) {
- freq = DEFAULT_HOT_COMPACT_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_compact_hot_frequency = freq;
-
- ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq);
- if (ret) {
- freq = DEFAULT_COLD_COMPACT_FREQ_SEC;
- }
-
- defrag->tier_conf.tier_compact_cold_frequency = freq;
-
- ret = dict_get_int32(this->options, "watermark-hi", &freq);
- if (ret) {
- freq = DEFAULT_WM_HI;
- }
-
- defrag->tier_conf.watermark_hi = freq;
-
- ret = dict_get_int32(this->options, "watermark-low", &freq);
- if (ret) {
- freq = DEFAULT_WM_LOW;
- }
-
- defrag->tier_conf.watermark_low = freq;
-
- ret = dict_get_int32(this->options, "write-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_WRITE_FREQ_SEC;
- }
-
- defrag->write_freq_threshold = freq;
-
- ret = dict_get_int32(this->options, "read-freq-threshold", &freq);
- if (ret) {
- freq = DEFAULT_READ_FREQ_SEC;
- }
-
- defrag->read_freq_threshold = freq;
-
- ret = dict_get_int32(this->options, "tier-max-mb", &freq);
- if (ret) {
- freq = DEFAULT_TIER_MAX_MIGRATE_MB;
- }
-
- defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024;
-
- ret = dict_get_int32(this->options, "tier-max-files", &freq);
- if (ret) {
- freq = DEFAULT_TIER_MAX_MIGRATE_FILES;
- }
-
- defrag->tier_conf.max_migrate_files = freq;
-
- ret = dict_get_int32(this->options, "tier-query-limit",
- &(defrag->tier_conf.query_limit));
- if (ret) {
- defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT;
- }
-
- ret = dict_get_str(this->options, "tier-compact", &mode);
-
- if (ret) {
- defrag->tier_conf.compact_active = DEFAULT_COMP_MODE;
- } else {
- compact_mode = tier_validate_compact_mode(mode);
- /* If compaction is now active, we need to inform the bricks on
- the hot and cold tier of this. See dht-common.h for more. */
- defrag->tier_conf.compact_active = compact_mode;
- if (compact_mode) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_true;
- defrag->tier_conf.compact_mode_switched_cold = _gf_true;
- }
- }
-
- ret = dict_get_str(this->options, "tier-mode", &mode);
- if (ret) {
- defrag->tier_conf.mode = DEFAULT_TIER_MODE;
- } else {
- tier_mode = tier_validate_mode(mode);
- defrag->tier_conf.mode = tier_mode;
- }
-
- pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0);
-
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING);
-
- ret = dict_get_str(this->options, "tier-pause", &paused);
-
- if (paused && strcmp(paused, "on") == 0)
- gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE);
-
- ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0)
- goto out;
-
- ret = mkdir_p(voldir, 0777, _gf_true);
- if (ret == -1 && errno != EEXIST) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "tier_init failed");
-
- GF_FREE(voldir);
- goto out;
- }
-
- GF_FREE(voldir);
-
- ret = gf_asprintf(&promotion_qfile, "%s/%s/promote",
- DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0)
- goto out;
-
- ret = gf_asprintf(&demotion_qfile, "%s/%s/demote",
- DEFAULT_VAR_RUN_DIRECTORY, this->name);
- if (ret < 0) {
- GF_FREE(promotion_qfile);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "Promote/demote frequency %d/%d "
- "Write/Read freq thresholds %d/%d",
- defrag->tier_conf.tier_promote_frequency,
- defrag->tier_conf.tier_demote_frequency,
- defrag->write_freq_threshold, defrag->read_freq_threshold);
-
- tier_save_vol_name(this);
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-int
-tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data)
-{
- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED,
- "Migrate file paused with op_ret %d", op_ret);
-
- return op_ret;
-}
-
-int
-tier_cli_pause(void *data)
-{
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- this = data;
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, exit);
-
- defrag = conf->defrag;
- GF_VALIDATE_OR_GOTO(this->name, defrag, exit);
-
- gf_defrag_pause_tier(this, defrag);
-
- ret = 0;
-exit:
- return ret;
-}
-
-int
-tier_reconfigure(xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- char *mode = NULL;
- int migrate_mb = 0;
- gf_boolean_t req_pause = _gf_false;
- int ret = 0;
- call_frame_t *frame = NULL;
- gf_boolean_t last_compact_setting = _gf_false;
-
- conf = this->private;
-
- if (conf->defrag) {
- defrag = conf->defrag;
- GF_OPTION_RECONF("tier-max-promote-file-size",
- defrag->tier_conf.tier_max_promote_size, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-promote-frequency",
- defrag->tier_conf.tier_promote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-demote-frequency",
- defrag->tier_conf.tier_demote_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold,
- options, int32, out);
-
- GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold,
- options, int32, out);
-
- GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi,
- options, int32, out);
-
- GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low,
- options, int32, out);
-
- last_compact_setting = defrag->tier_conf.compact_active;
-
- GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active,
- options, bool, out);
-
- if (last_compact_setting != defrag->tier_conf.compact_active) {
- defrag->tier_conf.compact_mode_switched_hot = _gf_true;
- defrag->tier_conf.compact_mode_switched_cold = _gf_true;
- gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS,
- "compact mode switched");
- }
-
- GF_OPTION_RECONF("tier-hot-compact-frequency",
- defrag->tier_conf.tier_compact_hot_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-cold-compact-frequency",
- defrag->tier_conf.tier_compact_cold_frequency, options,
- int32, out);
-
- GF_OPTION_RECONF("tier-mode", mode, options, str, out);
- defrag->tier_conf.mode = tier_validate_mode(mode);
-
- GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out);
- defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 *
- 1024;
-
- GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files,
- options, int32, out);
-
- GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit,
- options, int32, out);
-
- GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out);
-
- if (req_pause == _gf_true) {
- frame = create_frame(this, this->ctx->pool);
- if (!frame)
- goto out;
-
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
-
- ret = synctask_new(this->ctx->env, tier_cli_pause,
- tier_cli_pause_done, frame, this);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "pause tier failed on reconfigure");
- }
- } else {
- ret = gf_defrag_resume_tier(this, defrag);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
- "resume tier failed on reconfigure");
- }
- }
- }
-
-out:
- return dht_reconfigure(this, options);
-}
-
-void
-tier_fini(xlator_t *this)
-{
- if (libhandle)
- dlclose(libhandle);
-
- GF_FREE(demotion_qfile);
- GF_FREE(promotion_qfile);
-
- dht_fini(this);
-}
-
-class_methods_t class_methods = {.init = tier_init,
- .fini = tier_fini,
- .reconfigure = tier_reconfigure,
- .notify = dht_notify};
-
-struct xlator_fops fops = {
-
- .lookup = dht_lookup,
- .create = tier_create,
- .mknod = dht_mknod,
-
- .open = dht_open,
- .statfs = tier_statfs,
- .opendir = dht_opendir,
- .readdir = tier_readdir,
- .readdirp = tier_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = tier_unlink,
- .link = tier_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
-
- /* Inode read operations */
- .stat = dht_stat,
- .fstat = dht_fstat,
- .access = dht_access,
- .readlink = dht_readlink,
- .getxattr = dht_getxattr,
- .fgetxattr = dht_fgetxattr,
- .readv = dht_readv,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .lk = dht_lk,
-
- /* Inode write operations */
- .fremovexattr = dht_fremovexattr,
- .removexattr = dht_removexattr,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .writev = dht_writev,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
- .fsetattr = dht_fsetattr,
- .fallocate = dht_fallocate,
- .discard = dht_discard,
- .zerofill = dht_zerofill,
-};
-
-struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget};
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h
deleted file mode 100644
index f0ffdfcd769..00000000000
--- a/xlators/cluster/dht/src/tier.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIER_H_
-#define _TIER_H_
-
-/******************************************************************************/
-/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */
-#include "dht-common.h"
-#include "xlator.h"
-#include <signal.h>
-#include <fnmatch.h>
-#include <signal.h>
-
-/*
- * Size of timer wheel. We would not promote or demote less
- * frequently than this number.
- */
-#define TIMER_SECS 3600
-
-#include "gfdb_data_store.h"
-#include <ctype.h>
-#include <sys/stat.h>
-
-#define PROMOTION_QFILE "promotequeryfile"
-#define DEMOTION_QFILE "demotequeryfile"
-
-#define TIER_HASHED_SUBVOL conf->subvolumes[0]
-#define TIER_UNHASHED_SUBVOL conf->subvolumes[1]
-
-#define GET_QFILE_PATH(is_promotion) \
- (is_promotion) ? promotion_qfile : demotion_qfile
-
-typedef struct tier_qfile_array {
- int *fd_array;
- ssize_t array_size;
- ssize_t next_index;
- /* Indicate the number of exhuasted FDs*/
- ssize_t exhausted_count;
-} tier_qfile_array_t;
-
-typedef struct _query_cbk_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- /* This is write */
- int query_fd;
- int is_promotion;
- int is_compaction;
- /* This is for read */
- tier_qfile_array_t *qfile_array;
-} query_cbk_args_t;
-
-int
-gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag);
-
-typedef struct gfdb_brick_info {
- gfdb_time_t *time_stamp;
- gf_boolean_t _gfdb_promote;
- query_cbk_args_t *_query_cbk_args;
-} gfdb_brick_info_t;
-
-typedef struct brick_list {
- xlator_t *xlator;
- char *brick_db_path;
- char brick_name[NAME_MAX];
- char qfile_path[PATH_MAX];
- struct list_head list;
-} tier_brick_list_t;
-
-typedef struct _dm_thread_args {
- xlator_t *this;
- gf_defrag_info_t *defrag;
- struct list_head *brick_list;
- int freq_time;
- int return_value;
- int is_promotion;
- int is_compaction;
- gf_boolean_t is_hot_tier;
-} migration_args_t;
-
-typedef enum tier_watermark_op_ {
- TIER_WM_NONE = 0,
- TIER_WM_LOW,
- TIER_WM_HI,
- TIER_WM_MID
-} tier_watermark_op_t;
-
-#define DEFAULT_PROMOTE_FREQ_SEC 120
-#define DEFAULT_DEMOTE_FREQ_SEC 120
-#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800
-#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800
-#define DEFAULT_DEMOTE_DEGRADED 1
-#define DEFAULT_WRITE_FREQ_SEC 0
-#define DEFAULT_READ_FREQ_SEC 0
-#define DEFAULT_WM_LOW 75
-#define DEFAULT_WM_HI 90
-#define DEFAULT_TIER_MODE TIER_MODE_TEST
-#define DEFAULT_COMP_MODE _gf_true
-#define DEFAULT_TIER_MAX_MIGRATE_MB 1000
-#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000
-#define DEFAULT_TIER_QUERY_LIMIT 100
-
-#endif
diff --git a/xlators/cluster/dht/src/tier.sym b/xlators/cluster/dht/src/tier.sym
deleted file mode 100644
index 60205d145b6..00000000000
--- a/xlators/cluster/dht/src/tier.sym
+++ /dev/null
@@ -1,9 +0,0 @@
-fops
-cbks
-class_methods
-dht_methods
-tier_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_mock.c b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
index 49bf18b9fe6..771452963d1 100644
--- a/xlators/cluster/dht/src/unittest/dht_layout_mock.c
+++ b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
@@ -7,10 +7,10 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
#include "dht-common.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
int
dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
index 72890070835..c94a1d0a2e1 100644
--- a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
+++ b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
@@ -9,8 +9,8 @@
*/
#include "dht-common.h"
-#include "logging.h"
-#include "xlator.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/xlator.h>
#include <inttypes.h>
#include <stdarg.h>
diff --git a/xlators/cluster/ec/src/ec-code.c b/xlators/cluster/ec/src/ec-code.c
index b3338248569..03162ae05a9 100644
--- a/xlators/cluster/ec/src/ec-code.c
+++ b/xlators/cluster/ec/src/ec-code.c
@@ -14,7 +14,7 @@
#include <sys/stat.h>
#include <ctype.h>
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "ec-mem-types.h"
#include "ec-code.h"
diff --git a/xlators/cluster/ec/src/ec-code.h b/xlators/cluster/ec/src/ec-code.h
index 355209c3944..75fb35d93e3 100644
--- a/xlators/cluster/ec/src/ec-code.h
+++ b/xlators/cluster/ec/src/ec-code.h
@@ -11,8 +11,8 @@
#ifndef __EC_CODE_H__
#define __EC_CODE_H__
-#include "xlator.h"
-#include "list.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/list.h>
#include "ec-types.h"
#include "ec-galois.h"
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
index 454c2c72c95..703a30e2485 100644
--- a/xlators/cluster/ec/src/ec-combine.c
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -11,14 +11,14 @@
#include <fnmatch.h>
#include "libxlator.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "ec-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
#include "ec-messages.h"
-#include "quota-common-utils.h"
+#include <glusterfs/quota-common-utils.h>
#define EC_QUOTA_PREFIX "trusted.glusterfs.quota."
@@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
"links: %u-%u, uid: %u-%u, gid: %u-%u, "
"rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64
", "
- "mode: %o-%o)",
+ "mode: %o-%o), %s",
dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid,
src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev,
dst[i].ia_size, src[i].ia_size,
st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type));
+ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type),
+ ec_msg_str(fop));
return 0;
}
@@ -342,9 +343,8 @@ out:
}
static int32_t
-ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
- char *key, char *new_key, const char *def,
- gf_boolean_t global, ...)
+ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key,
+ const char *def, gf_boolean_t global, const char *fmt, ...)
{
ec_t *ec = cbk->fop->xl->private;
data_t *data[ec->nodes];
@@ -356,7 +356,7 @@ ec_dict_data_concat(const char *fmt, ec_cbk_data_t *cbk, int32_t which,
ec_dict_list(data, cbk, which, key, global);
- va_start(args, global);
+ va_start(args, fmt);
err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
va_end(args);
@@ -485,22 +485,12 @@ ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
tmp = NULL;
- len = dict_serialized_length(lockinfo);
- if (len < 0) {
- err = len;
-
- goto out;
- }
- ptr = GF_MALLOC(len, gf_common_mt_char);
- if (ptr == NULL) {
- err = -ENOMEM;
-
- goto out;
- }
- err = dict_serialize(lockinfo, ptr);
+ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr,
+ (unsigned int *)&len);
if (err != 0) {
goto out;
}
+
dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
err = dict_set_dynptr(dict, key, ptr, len);
if (err != 0) {
@@ -739,14 +729,14 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
(strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) {
- return ec_dict_data_concat("(<EC:%s> { })", data->cbk, data->which, key,
- NULL, NULL, _gf_false,
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, _gf_false, "(<EC:%s> { })",
data->cbk->fop->xl->name);
}
if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) {
- return ec_dict_data_concat("{\n}", data->cbk, data->which, key, NULL,
- NULL, _gf_false);
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, "{\n}");
}
if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) {
@@ -776,9 +766,9 @@ ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
if (XATTR_IS_NODE_UUID(key)) {
if (data->cbk->fop->int32) {
/* List of node uuid is requested */
- return ec_dict_data_concat("{ }", data->cbk, data->which, key,
+ return ec_dict_data_concat(data->cbk, data->which, key,
GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR,
- _gf_true);
+ _gf_true, "{ }");
} else {
return ec_dict_data_uuid(data->cbk, data->which, key);
}
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 737f7fda882..b955efd8c2d 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "byte-order.h"
-#include "hashfn.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/hashfn.h>
#include "ec-mem-types.h"
#include "ec-types.h"
@@ -44,16 +44,16 @@ ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status)
UNLOCK(&fd->lock);
}
-static int
-ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
+static uintptr_t
+ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t mask)
{
int i = 0;
int count = 0;
ec_t *ec = NULL;
ec_fd_t *fd_ctx = NULL;
+ uintptr_t need_open = 0;
ec = this->private;
- *need_open = 0;
fd_ctx = ec_fd_get(fd, this);
if (!fd_ctx)
@@ -63,9 +63,9 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
{
for (i = 0; i < ec->nodes; i++) {
if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
- (ec->xl_up & (1 << i))) {
+ ((ec->xl_up & (1 << i)) != 0) && ((mask & (1 << i)) != 0)) {
fd_ctx->fd_status[i] = EC_FD_OPENING;
- *need_open |= (1 << i);
+ need_open |= (1 << i);
count++;
}
}
@@ -76,10 +76,11 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open)
* then ignore fixing the fd as it has been
* requested from heal operation.
*/
- if (count >= ec->fragments)
- count = 0;
+ if (count >= ec->fragments) {
+ need_open = 0;
+ }
- return count;
+ return need_open;
}
static gf_boolean_t
@@ -96,11 +97,11 @@ ec_is_fd_fixable(fd_t *fd)
}
static void
-ec_fix_open(ec_fop_data_t *fop)
+ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
{
- int call_count = 0;
uintptr_t need_open = 0;
int ret = 0;
+ int32_t flags = 0;
loc_t loc = {
0,
};
@@ -109,9 +110,10 @@ ec_fix_open(ec_fop_data_t *fop)
goto out;
/* Evaluate how many remote fd's to be opened */
- call_count = ec_fd_ctx_need_open(fop->fd, fop->xl, &need_open);
- if (!call_count)
+ need_open = ec_fd_ctx_need_open(fop->fd, fop->xl, mask);
+ if (need_open == 0) {
goto out;
+ }
loc.inode = inode_ref(fop->fd->inode);
gf_uuid_copy(loc.gfid, fop->fd->inode->gfid);
@@ -120,12 +122,15 @@ ec_fix_open(ec_fop_data_t *fop)
goto out;
}
+ flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL));
if (IA_IFDIR == fop->fd->inode->ia_type) {
- ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
+ ec_opendir(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL,
&fop->loc[0], fop->fd, NULL);
} else {
- ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL,
- &loc, fop->fd->flags, fop->fd, NULL);
+ ec_open(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
+ flags, fop->fd, NULL);
}
out:
@@ -225,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
int32_t
ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
- uintptr_t bad, dict_t *xdata)
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
{
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
@@ -311,16 +316,19 @@ ec_check_status(ec_fop_data_t *fop)
}
}
- gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
- "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
- "remaining=%s, good=%s, bad=%s)",
- gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
- ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
- ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
- ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
- ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
- ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
- ec->nodes));
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
if (fop->use_fd) {
if (fop->fd != NULL) {
ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
@@ -494,12 +502,16 @@ ec_resume(ec_fop_data_t *fop, int32_t error)
}
void
-ec_resume_parent(ec_fop_data_t *fop, int32_t error)
+ec_resume_parent(ec_fop_data_t *fop)
{
ec_fop_data_t *parent;
+ int32_t error = 0;
parent = fop->parent;
if (parent != NULL) {
+ if ((fop->fop_flags & EC_FOP_NO_PROPAGATE_ERROR) == 0) {
+ error = fop->error;
+ }
ec_trace("RESUME_PARENT", fop, "error=%u", error);
fop->parent = NULL;
ec_resume(parent, error);
@@ -592,6 +604,8 @@ ec_internal_op(ec_fop_data_t *fop)
return _gf_true;
if (fop->id == GF_FOP_FXATTROP)
return _gf_true;
+ if (fop->id == GF_FOP_OPEN)
+ return _gf_true;
return _gf_false;
}
@@ -602,10 +616,10 @@ ec_msg_str(ec_fop_data_t *fop)
loc_t *loc2 = NULL;
char gfid1[64] = {0};
char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
if (fop->errstr)
return fop->errstr;
-
if (!fop->use_fd) {
loc1 = &fop->loc[0];
loc2 = &fop->loc[1];
@@ -613,24 +627,46 @@ ec_msg_str(ec_fop_data_t *fop)
if (fop->id == GF_FOP_RENAME) {
gf_asprintf(&fop->errstr,
"FOP : '%s' failed on '%s' and '%s' with gfids "
- "%s and %s respectively",
+ "%s and %s respectively. Parent FOP: %s",
ec_fop_name(fop->id), loc1->path, loc2->path,
uuid_utoa_r(loc1->gfid, gfid1),
- uuid_utoa_r(loc2->gfid, gfid2));
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on '%s' with gfid %s",
- ec_fop_name(fop->id), loc1->path,
- uuid_utoa_r(loc1->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
} else {
- gf_asprintf(&fop->errstr, "FOP : '%s' failed on gfid %s",
- ec_fop_name(fop->id),
- uuid_utoa_r(fop->fd->inode->gfid, gfid1));
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
}
return fop->errstr;
}
-int32_t
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
+static int32_t
ec_child_select(ec_fop_data_t *fop)
{
ec_t *ec = fop->xl->private;
@@ -644,6 +680,9 @@ ec_child_select(ec_fop_data_t *fop)
* unlock should go on all subvols where lock is performed*/
if (fop->parent && !ec_internal_op(fop)) {
fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= fop->parent->healing;
+ }
}
if ((fop->mask & ~ec->xl_up) != 0) {
@@ -684,15 +723,18 @@ ec_child_select(ec_fop_data_t *fop)
ec_trace("SELECT", fop, "");
if ((num < fop->minimum) && (num < ec->fragments)) {
- gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
- "Insufficient available children "
- "for this request (have %d, need "
- "%d). %s",
- num, fop->minimum, ec_msg_str(fop));
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
return 0;
}
- ec_sleep(fop);
+ if (!fop->parent && fop->lock_count &&
+ (fop->locks[0].update[EC_DATA_TXN] ||
+ fop->locks[0].update[EC_METADATA_TXN])) {
+ if (ec->quorum_count && (num < ec->quorum_count)) {
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
+ return 0;
+ }
+ }
return 1;
}
@@ -772,6 +814,8 @@ ec_dispatch_one(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = 1;
fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
@@ -806,6 +850,8 @@ ec_dispatch_inc(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
@@ -819,6 +865,8 @@ ec_dispatch_all(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = gf_bits_count(fop->remaining);
fop->first = 0;
@@ -837,6 +885,8 @@ ec_dispatch_min(ec_fop_data_t *fop)
ec_dispatch_start(fop);
if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
fop->expected = count = ec->fragments;
fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
idx = fop->first - 1;
@@ -851,6 +901,23 @@ ec_dispatch_min(ec_fop_data_t *fop)
}
}
+void
+ec_succeed_all(ec_fop_data_t *fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+ /* Simulate a successful execution on all bricks */
+ ec_trace("SUCCEED", fop, "");
+
+ fop->good = fop->remaining;
+ fop->remaining = 0;
+ }
+}
+
ec_lock_t *
ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
{
@@ -1372,27 +1439,28 @@ ec_get_size_version(ec_lock_link_t *link)
!ec_is_data_fop(fop->id))
link->optimistic_changelog = _gf_true;
+ memset(&loc, 0, sizeof(loc));
+
+ LOCK(&lock->loc.inode->lock);
+
set_dirty = ec_set_dirty_flag(link, ctx, dirty);
/* If ec metadata has already been retrieved, do not try again. */
- if (ctx->have_info && (!set_dirty)) {
+ if (ctx->have_info) {
if (ec_is_data_fop(fop->id)) {
fop->healing |= lock->healing;
}
- return;
+ if (!set_dirty)
+ goto unlock;
}
/* Determine if there's something we need to retrieve for the current
* operation. */
if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) &&
(lock->loc.inode->ia_type != IA_INVAL)) {
- return;
+ goto unlock;
}
- memset(&loc, 0, sizeof(loc));
-
- LOCK(&lock->loc.inode->lock);
-
changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty);
if (link->waiting_flags) {
/* This fop needs to wait until all its flags are cleared which
@@ -1403,6 +1471,7 @@ ec_get_size_version(ec_lock_link_t *link)
GF_ASSERT(!changed_flags);
}
+unlock:
UNLOCK(&lock->loc.inode->lock);
if (!changed_flags)
@@ -1814,6 +1883,10 @@ ec_lock_acquired(ec_lock_link_t *link)
LOCK(&lock->loc.inode->lock);
lock->acquired = _gf_true;
+ if (lock->contention) {
+ lock->release = _gf_true;
+ lock->contention = _gf_false;
+ }
ec_lock_update_fd(lock, fop);
ec_lock_wake_shared(lock, &list);
@@ -1824,7 +1897,8 @@ ec_lock_acquired(ec_lock_link_t *link)
if (fop->use_fd &&
(link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
- ec_fix_open(fop);
+ /* Try to reopen closed fd's only if lock has succeeded. */
+ ec_fix_open(fop, lock->mask);
}
ec_lock_resume_shared(&list);
@@ -1838,15 +1912,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
ec_lock_link_t *link = NULL;
ec_lock_t *lock = NULL;
+ link = fop->data;
+ lock = link->lock;
if (op_ret >= 0) {
- link = fop->data;
- lock = link->lock;
lock->mask = lock->good_mask = fop->good;
lock->healing = 0;
ec_lock_acquired(link);
ec_lock(fop->parent);
} else {
+ LOCK(&lock->loc.inode->lock);
+ {
+ lock->contention = _gf_false;
+ }
+ UNLOCK(&lock->loc.inode->lock);
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
"Failed to complete preop lock");
}
@@ -2177,7 +2256,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
- "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id));
+ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop));
} else {
ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
}
@@ -2214,6 +2293,23 @@ ec_unlock_lock(ec_lock_link_t *link)
}
}
+void
+ec_inode_bad_inc(inode_t *inode, xlator_t *xl)
+{
+ ec_inode_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __ec_inode_get(inode, xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+ ctx->bad_version++;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+}
+
int32_t
ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xattr,
@@ -2229,6 +2325,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
ctx = lock->ctx;
if (op_ret < 0) {
+ if (link->lock->fd == NULL) {
+ ec_inode_bad_inc(link->lock->loc.inode, this);
+ } else {
+ ec_inode_bad_inc(link->lock->fd->inode, this);
+ }
+
gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
EC_MSG_SIZE_VERS_UPDATE_FAIL,
"Failed to update version and size. %s", ec_msg_str(fop));
@@ -2371,37 +2473,47 @@ ec_update_info(ec_lock_link_t *link)
uint64_t dirty[2] = {0, 0};
uint64_t size;
ec_t *ec = NULL;
+ uintptr_t mask;
lock = link->lock;
ctx = lock->ctx;
ec = link->fop->xl->private;
/* pre_version[*] will be 0 if have_version is false */
- version[0] = ctx->post_version[0] - ctx->pre_version[0];
- version[1] = ctx->post_version[1] - ctx->pre_version[1];
+ version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] -
+ ctx->pre_version[EC_DATA_TXN];
+ version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] -
+ ctx->pre_version[EC_METADATA_TXN];
size = ctx->post_size - ctx->pre_size;
/* If we set the dirty flag for update fop, we have to unset it.
* If fop has failed on some bricks, leave the dirty as marked. */
+
if (lock->unlock_now) {
+ if (version[EC_DATA_TXN]) {
+ /*A data fop will have difference in post and pre version
+ *and for data fop we send writes on healing bricks also */
+ mask = lock->good_mask | lock->healing;
+ } else {
+ mask = lock->good_mask;
+ }
/* Ensure that nodes are up while doing final
* metadata update.*/
- if (!(ec->node_mask & ~lock->good_mask) &&
- !(ec->node_mask & ~ec->xl_up)) {
- if (ctx->dirty[0] != 0) {
- dirty[0] = -1;
+ if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) {
+ if (ctx->dirty[EC_DATA_TXN] != 0) {
+ dirty[EC_DATA_TXN] = -1;
}
- if (ctx->dirty[1] != 0) {
- dirty[1] = -1;
+ if (ctx->dirty[EC_METADATA_TXN] != 0) {
+ dirty[EC_METADATA_TXN] = -1;
}
/*If everything is fine and we already
*have version xattr set on entry, there
*is no need to update version again*/
- if (ctx->pre_version[0]) {
- version[0] = 0;
+ if (ctx->pre_version[EC_DATA_TXN]) {
+ version[EC_DATA_TXN] = 0;
}
- if (ctx->pre_version[1]) {
- version[1] = 0;
+ if (ctx->pre_version[EC_METADATA_TXN]) {
+ version[EC_METADATA_TXN] = 0;
}
} else {
link->optimistic_changelog = _gf_false;
@@ -2410,8 +2522,8 @@ ec_update_info(ec_lock_link_t *link)
memset(ctx->dirty, 0, sizeof(ctx->dirty));
}
- if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) ||
- (dirty[1] != 0)) {
+ if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) ||
+ (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) {
ec_update_size_version(link, version, size, dirty);
return _gf_true;
}
@@ -2453,13 +2565,20 @@ ec_lock_release(ec_t *ec, inode_t *inode)
goto done;
}
lock = ctx->inode_lock;
- if ((lock == NULL) || !lock->acquired || lock->release) {
+ if ((lock == NULL) || lock->release) {
goto done;
}
gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
inode);
+ if (!lock->acquired) {
+ /* This happens if some bricks already got the lock while inodelk is in
+ * progress. Set release to true after lock is acquired*/
+ lock->contention = _gf_true;
+ goto done;
+ }
+
/* The lock is not marked to be released, so the frozen list should be
* empty. */
GF_ASSERT(list_empty(&lock->frozen));
@@ -2911,3 +3030,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error)
__ec_manager(fop, error);
}
+
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec)
+{
+ if ((list_empty(&ec->pending_fops)) &&
+ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index bf41c0086f8..51493612ac6 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -11,8 +11,7 @@
#ifndef __EC_COMMON_H__
#define __EC_COMMON_H__
-#include "xlator.h"
-
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
#include "ec-data.h"
typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
@@ -26,6 +25,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
#define EC_FLAG_LOCK_SHARED 0x0001
+#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \
+ do { \
+ ec_t *__ec = fop->xl->private; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ int32_t __success_count = gf_bits_count(fop->good); \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (!fop->parent && frame && \
+ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \
+ __ec->quorum_count && (__success_count < __ec->quorum_count) && \
+ op_ret >= 0) { \
+ __op_ret = -1; \
+ __op_errno = EIO; \
+ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \
+ EC_MSG_CHILDS_INSUFFICIENT, \
+ "Insufficient available children for this request " \
+ "(have %d, need %d). %s", \
+ __success_count, __ec->quorum_count, ec_msg_str(fop)); \
+ } \
+ fn(frame, cookie, this, __op_ret, __op_errno, params); \
+ } while (0)
+
enum _ec_xattrop_flags {
EC_FLAG_XATTROP,
EC_FLAG_DATA_DIRTY,
@@ -54,9 +77,12 @@ enum _ec_xattrop_flags {
#define EC_SELFHEAL_BIT 62
-#define EC_MINIMUM_ONE -1
-#define EC_MINIMUM_MIN -2
-#define EC_MINIMUM_ALL -3
+#define EC_MINIMUM_ONE (1 << 6)
+#define EC_MINIMUM_MIN (2 << 6)
+#define EC_MINIMUM_ALL (3 << 6)
+#define EC_FOP_NO_PROPAGATE_ERROR (1 << 8)
+#define EC_FOP_MINIMUM(_flags) ((_flags)&255)
+#define EC_FOP_FLAGS(_flags) ((_flags) & ~255)
#define EC_UPDATE_DATA 1
#define EC_UPDATE_META 2
@@ -163,11 +189,14 @@ void
ec_dispatch_one(ec_fop_data_t *fop);
void
+ec_succeed_all(ec_fop_data_t *fop);
+
+void
ec_sleep(ec_fop_data_t *fop);
void
ec_resume(ec_fop_data_t *fop, int32_t error);
void
-ec_resume_parent(ec_fop_data_t *fop, int32_t error);
+ec_resume_parent(ec_fop_data_t *fop);
void
ec_manager(ec_fop_data_t *fop, int32_t error);
@@ -190,4 +219,16 @@ ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
void
ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index,
int32_t ret_status);
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop);
+void
+ec_set_entry_healing(ec_fop_data_t *fop);
+void
+ec_reset_entry_healing(ec_fop_data_t *fop);
+char *
+ec_msg_str(ec_fop_data_t *fop);
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec);
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop);
#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
index fae8843a679..06388833546 100644
--- a/xlators/cluster/ec/src/ec-data.c
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "ec-mem-types.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-data.h"
@@ -98,7 +97,7 @@ ec_cbk_data_destroy(ec_cbk_data_t *cbk)
ec_fop_data_t *
ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
- uint32_t flags, uintptr_t target, int32_t minimum,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
void *data)
{
@@ -151,7 +150,8 @@ ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
fop->refs = 1;
fop->flags = flags;
- fop->minimum = minimum;
+ fop->minimum = EC_FOP_MINIMUM(fop_flags);
+ fop->fop_flags = EC_FOP_FLAGS(fop_flags);
fop->mask = target;
fop->wind = wind;
@@ -201,11 +201,13 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
{
ec_t *ec = fop->xl->private;
+ *notify = _gf_false;
+
if (!list_empty(&fop->pending_list)) {
LOCK(&ec->lock);
{
list_del_init(&fop->pending_list);
- *notify = list_empty(&ec->pending_fops);
+ *notify = __ec_is_last_fop(ec);
}
UNLOCK(&ec->lock);
}
@@ -271,7 +273,7 @@ ec_fop_data_release(ec_fop_data_t *fop)
loc_wipe(&fop->loc[1]);
GF_FREE(fop->errstr);
- ec_resume_parent(fop, fop->error);
+ ec_resume_parent(fop);
ec_fop_cleanup(fop);
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index 112536d554c..c8a74ffe1ed 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -18,7 +18,7 @@ ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop,
int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno);
ec_fop_data_t *
ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
- uint32_t flags, uintptr_t target, int32_t minimum,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
void *data);
void
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
index ec4cefb1e78..f71dcfac293 100644
--- a/xlators/cluster/ec/src/ec-dir-read.c
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -8,15 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
/****************************************************************
@@ -127,13 +123,15 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
return EC_STATE_REPORT;
}
- err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
- if (err != 0) {
- UNLOCK(&fop->fd->lock);
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
- fop->error = -err;
+ fop->error = -err;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
UNLOCK(&fop->fd->lock);
@@ -219,7 +217,7 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
void
ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.opendir = func};
@@ -233,7 +231,7 @@ ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_opendir,
+ target, fop_flags, ec_wind_opendir,
ec_manager_opendir, callback, data);
if (fop == NULL) {
goto out;
@@ -388,9 +386,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
/* Return error if opendir has not been successfully called on
* any subvolume. */
ctx = ec_fd_get(fop->fd, fop->xl);
- if ((ctx == NULL) || (ctx->open == 0)) {
- fop->error = EINVAL;
+ if (ctx == NULL) {
+ fop->error = ENOMEM;
+ } else if (ctx->open == 0) {
+ fop->error = EBADFD;
+ }
+ if (fop->error) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
+ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
+ ec_msg_str(fop));
return EC_STATE_REPORT;
}
@@ -515,7 +520,7 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
void
ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.readdir = func};
@@ -529,7 +534,7 @@ ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_readdir,
+ target, fop_flags, ec_wind_readdir,
ec_manager_readdir, callback, data);
if (fop == NULL) {
goto out;
@@ -585,7 +590,7 @@ ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.readdirp = func};
@@ -599,7 +604,7 @@ ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_readdirp, ec_manager_readdir, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
index f5c38e80dd7..53d27d895c3 100644
--- a/xlators/cluster/ec/src/ec-dir-write.c
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -8,9 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -218,10 +215,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.create != NULL) {
- fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->fd, fop->loc[0].inode,
- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->fd,
+ fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1],
+ &cbk->iatt[2], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -262,7 +259,7 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
void
ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.create = func};
@@ -275,7 +272,7 @@ ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, fop_flags,
ec_wind_create, ec_manager_create, callback,
data);
if (fop == NULL) {
@@ -390,9 +387,10 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.link != NULL) {
- fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -432,9 +430,9 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
}
void
-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
ec_cbk_t callback = {.link = func};
ec_fop_data_t *fop = NULL;
@@ -446,7 +444,7 @@ ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, fop_flags,
ec_wind_link, ec_manager_link, callback, data);
if (fop == NULL) {
goto out;
@@ -569,9 +567,10 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.mkdir != NULL) {
- fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -613,9 +612,9 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
}
void
-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.mkdir = func};
ec_fop_data_t *fop = NULL;
@@ -627,7 +626,7 @@ ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, fop_flags,
ec_wind_mkdir, ec_manager_mkdir, callback, data);
if (fop == NULL) {
goto out;
@@ -773,9 +772,10 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.mknod != NULL) {
- fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
+ QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -815,9 +815,9 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
}
void
-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev,
- mode_t umask, dict_t *xdata)
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.mknod = func};
ec_fop_data_t *fop = NULL;
@@ -829,7 +829,7 @@ ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, fop_flags,
ec_wind_mknod, ec_manager_mknod, callback, data);
if (fop == NULL) {
goto out;
@@ -931,10 +931,10 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.rename != NULL) {
- fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3],
+ &cbk->iatt[4], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -975,7 +975,7 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
void
ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
loc_t *newloc, dict_t *xdata)
{
ec_cbk_t callback = {.rename = func};
@@ -988,7 +988,7 @@ ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, fop_flags,
ec_wind_rename, ec_manager_rename, callback,
data);
if (fop == NULL) {
@@ -1083,9 +1083,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.rmdir != NULL) {
- fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1125,9 +1125,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
}
void
-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags,
- dict_t *xdata)
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata)
{
ec_cbk_t callback = {.rmdir = func};
ec_fop_data_t *fop = NULL;
@@ -1139,7 +1139,7 @@ ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, fop_flags,
ec_wind_rmdir, ec_manager_rmdir, callback, data);
if (fop == NULL) {
goto out;
@@ -1237,10 +1237,10 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.symlink != NULL) {
- fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, fop->loc[0].inode,
- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1281,7 +1281,7 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
void
ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_symlink_cbk_t func, void *data,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
{
ec_cbk_t callback = {.symlink = func};
@@ -1294,9 +1294,9 @@ ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, minimum,
- ec_wind_symlink, ec_manager_symlink, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target,
+ fop_flags, ec_wind_symlink, ec_manager_symlink,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1392,9 +1392,9 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.unlink != NULL) {
- fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1435,7 +1435,7 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
void
ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
int xflags, dict_t *xdata)
{
ec_cbk_t callback = {.unlink = func};
@@ -1448,7 +1448,7 @@ ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, fop_flags,
ec_wind_unlink, ec_manager_unlink, callback,
data);
if (fop == NULL) {
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
index 2858d829c73..07edf8a7fec 100644
--- a/xlators/cluster/ec/src/ec-fops.h
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -11,240 +11,244 @@
#ifndef __EC_FOPS_H__
#define __EC_FOPS_H__
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "ec-types.h"
#include "ec-common.h"
void
ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
int32_t mask, dict_t *xdata);
void
ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
void
ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_entrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
const char *volume, loc_t *loc, const char *basename,
entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
void
ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata);
void
-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata);
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
void
-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync,
- dict_t *xdata);
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
void
ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
int32_t datasync, dict_t *xdata);
void
ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
const char *name, dict_t *xdata);
void
ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
const char *name, dict_t *xdata);
void
-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
- dict_t *xdata);
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata);
void
-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
- dict_t *xdata);
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata);
void
ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
void *data, const char *volume, loc_t *loc, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
void *data, const char *volume, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata);
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
void
-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata);
void
ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
dict_t *xdata);
void
-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata);
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
void
-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev,
- mode_t umask, dict_t *xdata);
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
void
-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd,
- dict_t *xdata);
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
void
ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
fd_t *fd, dict_t *xdata);
void
ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata);
void
ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
size_t size, off_t offset, dict_t *xdata);
void
ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
size_t size, dict_t *xdata);
void
-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset,
- uint32_t flags, dict_t *xdata);
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata);
void
ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_removexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
loc_t *loc, const char *name, dict_t *xdata);
void
ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
fd_t *fd, const char *name, dict_t *xdata);
void
ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
loc_t *newloc, dict_t *xdata);
void
-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags,
- dict_t *xdata);
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata);
void
ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata);
void
ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata);
void
ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
dict_t *dict, int32_t flags, dict_t *xdata);
void
ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
dict_t *dict, int32_t flags, dict_t *xdata);
void
-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata);
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
void
-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata);
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
void
ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
dict_t *xdata);
void
ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_symlink_cbk_t func, void *data,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
void
ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
int32_t mode, off_t offset, size_t len, dict_t *xdata);
void
ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
off_t offset, size_t len, dict_t *xdata);
void
ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
off_t offset, dict_t *xdata);
void
ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
off_t offset, dict_t *xdata);
void
ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
int xflags, dict_t *xdata);
void
ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata);
void
ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
void
ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
void
-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata);
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
void
-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata);
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata);
#endif /* __EC_FOPS_H__ */
diff --git a/xlators/cluster/ec/src/ec-galois.c b/xlators/cluster/ec/src/ec-galois.c
index 8cb4dc2e4e3..6e4990c71f5 100644
--- a/xlators/cluster/ec/src/ec-galois.c
+++ b/xlators/cluster/ec/src/ec-galois.c
@@ -10,9 +10,6 @@
#include <string.h>
-#include "mem-pool.h"
-#include "list.h"
-
#include "ec-mem-types.h"
#include "ec-gf8.h"
#include "ec-helpers.h"
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
index 2fd10526494..884deb93669 100644
--- a/xlators/cluster/ec/src/ec-generic.c
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -8,16 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
/* FOP: flush */
@@ -150,9 +147,41 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state)
}
}
+static int32_t
+ec_validate_fd(fd_t *fd, xlator_t *xl)
+{
+ uint64_t iversion = 0;
+ uint64_t fversion = 0;
+ ec_inode_t *inode_ctx = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ fversion = fd_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ LOCK(&fd->inode->lock);
+ {
+ inode_ctx = __ec_inode_get(fd->inode, xl);
+ if (inode_ctx) {
+ iversion = inode_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->inode->lock);
+ if (fversion < iversion) {
+ return EBADF;
+ }
+ return 0;
+}
+
void
-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata)
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
{
ec_cbk_t callback = {.flush = func};
ec_fop_data_t *fop = NULL;
@@ -164,7 +193,17 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, minimum,
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
ec_wind_flush, ec_manager_flush, callback, data);
if (fop == NULL) {
goto out;
@@ -366,9 +405,9 @@ ec_manager_fsync(ec_fop_data_t *fop, int32_t state)
}
void
-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync,
- dict_t *xdata)
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
{
ec_cbk_t callback = {.fsync = func};
ec_fop_data_t *fop = NULL;
@@ -380,7 +419,17 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, minimum,
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
ec_wind_fsync, ec_manager_fsync, callback, data);
if (fop == NULL) {
goto out;
@@ -553,7 +602,7 @@ ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state)
void
ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
int32_t datasync, dict_t *xdata)
{
ec_cbk_t callback = {.fsyncdir = func};
@@ -566,9 +615,9 @@ ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, minimum,
- ec_wind_fsyncdir, ec_manager_fsyncdir, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target,
+ fop_flags, ec_wind_fsyncdir, ec_manager_fsyncdir,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -848,7 +897,7 @@ ec_manager_lookup(ec_fop_data_t *fop, int32_t state)
void
ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
dict_t *xdata)
{
ec_cbk_t callback = {.lookup = func};
@@ -862,7 +911,7 @@ ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_lookup,
+ target, fop_flags, ec_wind_lookup,
ec_manager_lookup, callback, data);
if (fop == NULL) {
goto out;
@@ -1033,7 +1082,7 @@ ec_manager_statfs(ec_fop_data_t *fop, int32_t state)
void
ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
dict_t *xdata)
{
ec_cbk_t callback = {.statfs = func};
@@ -1047,7 +1096,7 @@ ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_statfs,
+ target, fop_flags, ec_wind_statfs,
ec_manager_statfs, callback, data);
if (fop == NULL) {
goto out;
@@ -1270,7 +1319,7 @@ ec_manager_xattrop(ec_fop_data_t *fop, int32_t state)
void
ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
ec_cbk_t callback = {.xattrop = func};
@@ -1283,9 +1332,9 @@ ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, minimum,
- ec_wind_xattrop, ec_manager_xattrop, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target,
+ fop_flags, ec_wind_xattrop, ec_manager_xattrop,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1343,7 +1392,7 @@ ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
ec_cbk_t callback = {.fxattrop = func};
@@ -1356,9 +1405,9 @@ ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, minimum,
- ec_wind_fxattrop, ec_manager_xattrop, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target,
+ fop_flags, ec_wind_fxattrop, ec_manager_xattrop,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1507,8 +1556,9 @@ ec_manager_ipc(ec_fop_data_t *fop, int32_t state)
}
void
-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata)
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata)
{
ec_cbk_t callback = {.ipc = func};
ec_fop_data_t *fop = NULL;
@@ -1520,7 +1570,7 @@ ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, fop_flags,
ec_wind_ipc, ec_manager_ipc, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 9eb5b856932..7d991f04aac 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -8,16 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
-#include "byte-order.h"
-#include "syncop.h"
-#include "syncop-utils.h"
-#include "cluster-syncop.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/cluster-syncop.h>
#include "ec.h"
-#include "ec-mem-types.h"
#include "ec-types.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -72,6 +70,7 @@ struct ec_name_data {
char *name;
inode_t *parent;
default_args_cbk_t *replies;
+ uint32_t heal_pending;
};
static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
@@ -103,6 +102,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
}
/* FOP: heal */
+void
+ec_set_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += 1;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+}
+
+void
+ec_reset_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+ int32_t heal_count = 0;
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += -1;
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+}
+
uintptr_t
ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood)
{
@@ -325,16 +366,16 @@ ec_heal_data_block(ec_heal_t *heal)
/* FOP: fheal */
void
-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial,
- dict_t *xdata)
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata)
{
ec_fd_t *ctx = ec_fd_get(fd, this);
if (ctx != NULL) {
gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags,
ctx->open);
- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
+ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial,
xdata);
}
}
@@ -954,6 +995,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
ret = -ENOTCONN;
goto out;
}
+
out:
if (xattr)
dict_unref(xattr);
@@ -977,6 +1019,7 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
int estale_count = 0;
int i = 0;
call_frame_t *frame = name_data->frame;
+ uuid_t gfid;
ec = name_data->frame->this->private;
EC_REPLIES_ALLOC(replies, ec->nodes);
@@ -985,12 +1028,16 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
goto out;
}
+ loc.parent = inode_ref(name_data->parent);
loc.inode = inode_new(name_data->parent->table);
if (!loc.inode) {
ret = -ENOMEM;
goto out;
}
- gf_uuid_parse(key, loc.gfid);
+
+ gf_uuid_parse(key, gfid);
+ gf_uuid_copy(loc.pargfid, name_data->parent->gfid);
+ loc.name = name_data->name;
output = alloca0(ec->nodes);
ret = cluster_lookup(ec->xl_list, name_data->participants, ec->nodes,
replies, output, name_data->frame, ec->xl, &loc, NULL);
@@ -1003,6 +1050,11 @@ ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
estale_count++;
else
name_data->participants[i] = 0;
+ } else if (gf_uuid_compare(gfid, replies[i].stat.ia_gfid)) {
+ estale_count++;
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: different gfid as %s",
+ uuid_utoa(name_data->parent->gfid), name_data->name,
+ key);
}
}
@@ -1122,6 +1174,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
dict_t *xdata = NULL;
char *linkname = NULL;
ec_config_t config;
+
/* There should be just one gfid key */
EC_REPLIES_ALLOC(replies, ec->nodes);
if (gfid_db->count != 1) {
@@ -1366,6 +1419,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
participants);
+ if (ret >= 0) {
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
+ * file has been created and it will need to be healed. */
+ ret = 1;
+ }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -1443,18 +1501,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
name_on);
- if (ret < 0)
+ if (ret < 0) {
memset(name_on, 0, ec->nodes);
+ } else {
+ name_data->heal_pending += ret;
+ }
for (i = 0; i < ec->nodes; i++)
if (name_data->participants[i] && !name_on[i])
name_data->failed_on[i] = 1;
+
return 0;
}
int
ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *participants)
+ unsigned char *participants, uint32_t *pending)
{
int i = 0;
int j = 0;
@@ -1467,7 +1529,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
name_data.frame = frame;
name_data.participants = participants;
name_data.failed_on = alloca0(ec->nodes);
- ;
+ name_data.heal_pending = 0;
for (i = 0; i < ec->nodes; i++) {
if (!participants[i])
@@ -1486,6 +1548,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
break;
}
}
+ *pending += name_data.heal_pending;
+
loc_wipe(&loc);
return ret;
}
@@ -1493,7 +1557,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
int
__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
unsigned char *heal_on, unsigned char *sources,
- unsigned char *healed_sinks)
+ unsigned char *healed_sinks, uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *output = NULL;
@@ -1538,7 +1602,7 @@ unlock:
if (sources[i] || healed_sinks[i])
participants[i] = 1;
}
- ret = ec_heal_names(frame, ec, inode, participants);
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
goto out;
@@ -1559,7 +1623,8 @@ out:
int
ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
- unsigned char *sources, unsigned char *healed_sinks)
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint32_t *pending)
{
unsigned char *locked_on = NULL;
unsigned char *up_subvols = NULL;
@@ -1590,7 +1655,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
goto unlock;
}
ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
- healed_sinks);
+ healed_sinks, pending);
}
unlock:
cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
@@ -1909,16 +1974,16 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
case EC_STATE_REPORT:
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0,
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
(heal->good | heal->bad), heal->good, heal->bad,
- NULL);
+ 0, NULL);
}
return EC_STATE_END;
case -EC_STATE_REPORT:
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
- 0, 0, NULL);
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
+ fop->error, 0, 0, 0, 0, NULL);
}
return EC_STATE_END;
@@ -1933,7 +1998,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
/*Takes lock */
void
ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal)
+ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal)
{
ec_cbk_t callback = {.heal = func};
ec_fop_data_t *fop = NULL;
@@ -1944,7 +2009,7 @@ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
NULL, ec_manager_heal_block, callback, heal);
if (fop == NULL)
goto out;
@@ -1955,19 +2020,21 @@ out:
if (fop != NULL) {
ec_manager(fop, error);
} else {
- func(frame, NULL, this, -1, error, 0, 0, 0, NULL);
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
}
}
int32_t
ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- ec_heal_t *heal = fop->data;
+ ec_heal_t *heal = cookie;
- fop->heal = NULL;
+ if (heal->fop) {
+ heal->fop->heal = NULL;
+ }
heal->fop = NULL;
heal->error = op_ret < 0 ? op_errno : 0;
syncbarrier_wake(heal->data);
@@ -2259,9 +2326,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
loc.inode = inode_ref(fd->inode);
gf_uuid_copy(loc.gfid, fd->inode->gfid);
- ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies,
- output, frame, ec->xl, &loc, &source_buf,
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
+ ret = cluster_setattr(
+ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
+ ec->xl, &loc, &source_buf,
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL);
EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
ret = -ENOTCONN;
@@ -2429,6 +2497,58 @@ out:
return ret;
}
+int
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t **xattr = NULL;
+ loc_t loc = {0};
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *dict = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
+ xattr, NULL);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (xattr) {
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
void
ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
{
@@ -2446,6 +2566,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
intptr_t mbad = 0;
intptr_t good = 0;
intptr_t bad = 0;
+ uint32_t pending = 0;
ec_fop_data_t *fop = data;
gf_boolean_t blocking = _gf_false;
ec_heal_need_t need_heal = EC_HEAL_NONEED;
@@ -2481,7 +2602,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
if (loc->name && strlen(loc->name)) {
ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
participants);
- if (ret == 0) {
+ if (ret >= 0) {
gf_msg_debug(this->name, 0,
"%s: name heal "
"successful on %" PRIXPTR,
@@ -2499,32 +2620,34 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
/* Mount triggers heal only when it detects that it must need heal, shd
* triggers heals periodically which need not be thorough*/
- ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false,
- !ec->shd.iamshd, &need_heal);
-
- if (need_heal == EC_HEAL_NONEED) {
- gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
- "Heal is not required for : %s ", uuid_utoa(loc->gfid));
- goto out;
+ if (ec->shd.iamshd && (ret <= 0)) {
+ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
+ &need_heal);
+
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+ goto out;
+ }
}
- msources = alloca0(ec->nodes);
- mhealed_sinks = alloca0(ec->nodes);
- ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
- if (ret == 0) {
- mgood = ec_char_array_to_mask(msources, ec->nodes);
- mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
- } else {
- op_ret = -1;
- op_errno = -ret;
- }
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
if (IA_ISREG(loc->inode->ia_type)) {
ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
healed_sinks);
} else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks);
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
+ &pending);
} else {
ret = 0;
memcpy(sources, participants, ec->nodes);
@@ -2538,15 +2661,27 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
op_ret = -1;
op_errno = -ret;
}
+ msources = alloca0(ec->nodes);
+ mhealed_sinks = alloca0(ec->nodes);
+ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
+ if (ret == 0) {
+ mgood = ec_char_array_to_mask(msources, ec->nodes);
+ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
out:
+ ec_reset_entry_healing(fop);
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno,
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
ec_char_array_to_mask(participants, ec->nodes),
- mgood & good, mbad & bad, NULL);
+ mgood & good, mbad & bad, pending, NULL);
}
if (frame)
STACK_DESTROY(frame->root);
+
return;
}
@@ -2593,8 +2728,8 @@ void
ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
{
if (fop->cbks.heal) {
- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0,
- NULL);
+ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
+ 0, 0, NULL);
}
ec_fop_data_release(fop);
}
@@ -2603,13 +2738,31 @@ void
ec_launch_heal(ec_t *ec, ec_fop_data_t *fop)
{
int ret = 0;
+ call_frame_t *frame = NULL;
+
+ frame = create_frame(ec->xl, ec->xl->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done,
- NULL, fop);
+ frame, fop);
+out:
if (ret < 0) {
ec_fop_set_error(fop, ENOMEM);
ec_heal_fail(ec, fop);
}
+
+ if (frame)
+ STACK_DESTROY(frame->root);
}
void
@@ -2650,11 +2803,33 @@ ec_handle_healers_done(ec_fop_data_t *fop)
ec_launch_heal(ec, heal_fop);
}
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ int32_t heal_count = 0;
+ loc_t *loc = NULL;
+
+ loc = &fop->loc[0];
+
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+ return heal_count;
+}
+
void
ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
{
gf_boolean_t can_heal = _gf_true;
ec_t *ec = this->private;
+ ec_fop_data_t *fop_rel = NULL;
if (fop->req_frame == NULL) {
LOCK(&ec->lock);
@@ -2662,8 +2837,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
if ((ec->background_heals > 0) &&
(ec->heal_wait_qlen + ec->background_heals) >
(ec->heal_waiters + ec->healers)) {
- list_add_tail(&fop->healer, &ec->heal_waiting);
- ec->heal_waiters++;
+ if (!ec_is_entry_healing(fop)) {
+ list_add_tail(&fop->healer, &ec->heal_waiting);
+ ec->heal_waiters++;
+ ec_set_entry_healing(fop);
+ } else {
+ fop_rel = fop;
+ }
fop = __ec_dequeue_heals(ec);
} else {
can_heal = _gf_false;
@@ -2673,8 +2853,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
}
if (can_heal) {
- if (fop)
+ if (fop) {
+ if (fop->req_frame != NULL) {
+ ec_set_entry_healing(fop);
+ }
ec_launch_heal(ec, fop);
+ }
} else {
gf_msg_debug(this->name, 0,
"Max number of heals are "
@@ -2682,12 +2866,15 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
ec_fop_set_error(fop, EBUSY);
ec_heal_fail(ec, fop);
}
+ if (fop_rel) {
+ ec_heal_done(0, NULL, fop_rel);
+ }
}
void
-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial,
- dict_t *xdata)
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata)
{
ec_cbk_t callback = {.heal = func};
ec_fop_data_t *fop = NULL;
@@ -2703,7 +2890,7 @@ ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
if (frame && frame->local)
goto fail;
- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
NULL, NULL, callback, data);
err = ENOMEM;
@@ -2729,15 +2916,27 @@ fail:
if (fop)
ec_fop_data_release(fop);
if (func)
- func(frame, NULL, this, -1, err, 0, 0, 0, NULL);
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
}
int
ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
{
ec_t *ec = opaque;
+ gf_boolean_t last_fop = _gf_false;
+ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
+ LOCK(&ec->lock);
+ {
+ last_fop = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
+
+ if (last_fop)
+ ec_pending_fops_completed(ec);
+
return 0;
}
@@ -2777,6 +2976,10 @@ ec_replace_brick_heal_wrap(void *opaque)
itable = ec->xl->itable;
else
goto out;
+
+ if (xlator_is_cleanup_starting(ec->xl))
+ goto out;
+
ret = ec_replace_heal(ec, itable->root);
out:
return ret;
@@ -2787,14 +2990,15 @@ ec_launch_replace_heal(ec_t *ec)
{
int ret = -1;
- if (!ec)
- return ret;
ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
ec_replace_heal_done, NULL, ec);
+
if (ret < 0) {
gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
ret);
+ ec_replace_heal_done(-1, NULL, ec);
}
+
return ret;
}
@@ -2826,7 +3030,7 @@ out:
static int32_t
_need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
gf_boolean_t self_locked, int32_t lock_count,
- ec_heal_need_t *need_heal)
+ ec_heal_need_t *need_heal, uint64_t *versions)
{
int i = 0;
int source_count = 0;
@@ -2836,11 +3040,18 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
*need_heal = EC_HEAL_NONEED;
if (self_locked || lock_count == 0) {
for (i = 0; i < ec->nodes; i++) {
- if (dirty[i]) {
+ if (dirty[i] || (versions[i] != versions[0])) {
*need_heal = EC_HEAL_MUST;
goto out;
}
}
+ /* If lock count is 0, all dirty flags are 0 and all the
+ * versions are macthing then why are we here. It looks
+ * like something went wrong while removing the index entries
+ * after completing a successful heal or fop. In this case
+ * we need to remove this index entry to avoid triggering heal
+ * in a loop and causing lookups again and again*/
+ *need_heal = EC_HEAL_PURGE_INDEX;
} else {
for (i = 0; i < ec->nodes; i++) {
/* Since each lock can only increment the dirty
@@ -2852,6 +3063,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
*need_heal = EC_HEAL_MUST;
goto out;
}
+ if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MAYBE;
+ }
}
}
} else {
@@ -2872,7 +3086,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
unsigned char *healed_sinks = NULL;
uint64_t *meta_versions = NULL;
int ret = 0;
- int i = 0;
sources = alloca0(ec->nodes);
healed_sinks = alloca0(ec->nodes);
@@ -2885,15 +3098,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
- if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {
- for (i = 1; i < ec->nodes; i++) {
- if (meta_versions[i] != meta_versions[0]) {
- *need_heal = EC_HEAL_MUST;
- goto out;
- }
- }
- }
+ need_heal, meta_versions);
out:
return ret;
}
@@ -2929,7 +3134,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
+ need_heal, data_versions);
out:
return ret;
}
@@ -2957,7 +3162,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
}
ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
- need_heal);
+ need_heal, data_versions);
out:
return ret;
}
@@ -3055,10 +3260,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
need_heal:
ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
need_heal);
-
- if (!self_locked && *need_heal == EC_HEAL_MUST) {
- *need_heal = EC_HEAL_MAYBE;
- }
out:
cluster_replies_wipe(replies, ec->nodes);
loc_wipe(&loc);
@@ -3144,7 +3345,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
_gf_false, &need_heal);
- if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {
+ if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
goto set_heal;
}
need_heal = EC_HEAL_NONEED;
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 130790c66ac..5c1586bc9c5 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -8,15 +8,14 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
#include "ec.h"
#include "ec-messages.h"
#include "ec-heald.h"
#include "ec-mem-types.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#include "protocol-common.h"
#define NTH_INDEX_HEALER(this, n) \
@@ -63,7 +62,7 @@ __ec_shd_healer_wait(struct subvol_healer *healer)
ec = healer->this->private;
disabled_loop:
- wait_till.tv_sec = time(NULL) + 60;
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
while (!healer->rerun) {
ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
@@ -71,6 +70,11 @@ disabled_loop:
break;
}
+ if (ec->shutdown) {
+ healer->running = _gf_false;
+ return -1;
+ }
+
ret = healer->rerun;
healer->rerun = 0;
@@ -152,19 +156,78 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
+ dict_t *xdata = NULL;
+ dict_t *dict = NULL;
+ uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
- * other index entries have appeared to be healed. We put a
- * mark so that we can check it later and restart a scan
- * without delay. */
- healer->rerun = _gf_true;
+ * other index entries have appeared to be healed. */
+ if ((xdata != NULL) &&
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
+ (count > 0)) {
+ /* Force a rerun of the index healer. */
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
+ count);
+
+ healer->rerun = _gf_true;
+ }
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (dict) {
+ dict_unref(dict);
}
return ret;
@@ -241,9 +304,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
goto out;
}
+ _mask_cancellation();
ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
healer, ec_shd_index_heal, xdata,
ec->shd.max_threads, ec->shd.wait_qlength);
+ _unmask_cancellation();
out:
if (xdata)
dict_unref(xdata);
@@ -263,6 +328,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
int ret = 0;
ec = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
if (ec->xl_up_count <= ec->fragments) {
return -ENOTCONN;
}
@@ -305,11 +375,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
{
ec_t *ec = NULL;
loc_t loc = {0};
+ int ret = -1;
ec = healer->this->private;
loc.inode = inode;
- return syncop_ftw(ec->xl_list[healer->subvol], &loc,
- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _mask_cancellation();
+ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _unmask_cancellation();
+ return ret;
}
void *
@@ -317,13 +391,16 @@ ec_shd_index_healer(void *data)
{
struct subvol_healer *healer = NULL;
xlator_t *this = NULL;
+ int run = 0;
healer = data;
THIS = this = healer->this;
ec_t *ec = this->private;
for (;;) {
- ec_shd_healer_wait(healer);
+ run = ec_shd_healer_wait(healer);
+ if (run == -1)
+ break;
if (ec->xl_up_count > ec->fragments) {
gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
@@ -352,16 +429,12 @@ ec_shd_full_healer(void *data)
rootloc.inode = this->itable->root;
for (;;) {
- pthread_mutex_lock(&healer->mutex);
- {
- run = __ec_shd_healer_wait(healer);
- if (!run)
- healer->running = _gf_false;
- }
- pthread_mutex_unlock(&healer->mutex);
-
- if (!run)
+ run = ec_shd_healer_wait(healer);
+ if (run < 0) {
break;
+ } else if (run == 0) {
+ continue;
+ }
if (ec->xl_up_count > ec->fragments) {
gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
@@ -429,6 +502,9 @@ unlock:
int
ec_shd_full_healer_spawn(xlator_t *this, int subvol)
{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
ec_shd_full_healer);
}
@@ -436,6 +512,9 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol)
int
ec_shd_index_healer_spawn(xlator_t *this, int subvol)
{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
ec_shd_index_healer);
}
@@ -562,3 +641,41 @@ out:
dict_del(output, this->name);
return ret;
}
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ if (!healer)
+ return;
+
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ ec_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->nodes; i++) {
+ healer = &shd->index_healers[i];
+ ec_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ ec_destroy_healer_object(this, healer);
+ }
+
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
index 4d141d767e5..6c7da4edc10 100644
--- a/xlators/cluster/ec/src/ec-heald.h
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -11,9 +11,9 @@
#ifndef __EC_HEALD_H__
#define __EC_HEALD_H__
-#include "xlator.h"
-
-#include "ec-types.h"
+#include "ec-types.h" // for ec_t
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/globals.h" // for xlator_t
int
ec_xl_op(xlator_t *this, dict_t *input, dict_t *output);
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
void
ec_shd_index_healer_wake(ec_t *ec);
+void
+ec_selfheal_daemon_fini(xlator_t *this);
+
#endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
index dec39b9d2aa..48f54475e01 100644
--- a/xlators/cluster/ec/src/ec-helpers.c
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -10,7 +10,7 @@
#include <libgen.h>
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "ec.h"
#include "ec-mem-types.h"
@@ -476,7 +476,7 @@ out:
int32_t
ec_loc_setup_path(xlator_t *xl, loc_t *loc)
{
- uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
char *name;
int32_t ret = -EINVAL;
@@ -717,6 +717,7 @@ __ec_inode_get(inode_t *inode, xlator_t *xl)
memset(ctx, 0, sizeof(*ctx));
INIT_LIST_HEAD(&ctx->heal);
INIT_LIST_HEAD(&ctx->stripe_cache.lru);
+ ctx->heal_count = 0;
value = (uint64_t)(uintptr_t)ctx;
if (__inode_ctx_set(inode, xl, &value) != 0) {
GF_FREE(ctx);
@@ -752,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
{
int i = 0;
ec_fd_t *ctx = NULL;
+ ec_inode_t *ictx = NULL;
uint64_t value = 0;
ec_t *ec = xl->private;
@@ -774,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl)
GF_FREE(ctx);
return NULL;
}
+ /* Only refering bad-version so no need for lock
+ * */
+ ictx = __ec_inode_get(fd->inode, xl);
+ if (ictx) {
+ ctx->bad_version = ictx->bad_version;
+ }
}
} else {
ctx = (ec_fd_t *)(uintptr_t)value;
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index c02d5401439..dad5f4d7018 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -8,9 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
@@ -135,7 +132,7 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state)
void
ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
int32_t mask, dict_t *xdata)
{
ec_cbk_t callback = {.access = func};
@@ -149,7 +146,7 @@ ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_access,
+ target, fop_flags, ec_wind_access,
ec_manager_access, callback, data);
if (fop == NULL) {
goto out;
@@ -393,15 +390,34 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
int32_t
ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
int32_t op_ret, int32_t op_errno, uintptr_t mask,
- uintptr_t good, uintptr_t bad, dict_t *xdata)
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
{
- ec_fop_data_t *fop = cookie;
- fop_getxattr_cbk_t func = fop->data;
+ fop_getxattr_cbk_t func = cookie;
ec_t *ec = xl->private;
dict_t *dict = NULL;
char *str;
char bin1[65], bin2[65];
+ /* We try to return the 'pending' information in xdata, but if this cannot
+ * be set, we will ignore it silently. We prefer to report the success or
+ * failure of the heal itself. */
+ if (xdata == NULL) {
+ xdata = dict_new();
+ } else {
+ dict_ref(xdata);
+ }
+ if (xdata != NULL) {
+ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
+ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
+ * enforces to check the result in this case. However we don't
+ * really care if it succeeded or not. We'll just do the same.
+ *
+ * This empty 'if' avoids the warning, and it will be removed by
+ * the optimizer. */
+ }
+ }
+
if (op_ret >= 0) {
dict = dict_new();
if (dict == NULL) {
@@ -435,18 +451,21 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
}
out:
- func(frame, NULL, xl, op_ret, op_errno, dict, NULL);
+ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
if (dict != NULL) {
dict_unref(dict);
}
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
return 0;
}
void
ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.getxattr = func};
@@ -468,7 +487,7 @@ ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
}
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_getxattr, ec_manager_getxattr, callback, data);
if (fop == NULL) {
goto out;
@@ -588,7 +607,7 @@ ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.fgetxattr = func};
@@ -602,7 +621,7 @@ ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_fgetxattr, ec_manager_getxattr, callback, data);
if (fop == NULL) {
goto out;
@@ -774,13 +793,15 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state)
return EC_STATE_REPORT;
}
- err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
- if (err != 0) {
- UNLOCK(&fop->fd->lock);
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
- fop->error = -err;
+ fop->error = -err;
- return EC_STATE_REPORT;
+ return EC_STATE_REPORT;
+ }
}
ctx->flags = fop->int32;
@@ -869,9 +890,9 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state)
}
void
-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd,
- dict_t *xdata)
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
ec_cbk_t callback = {.open = func};
ec_fop_data_t *fop = NULL;
@@ -884,7 +905,7 @@ ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_open, ec_manager_open,
+ target, fop_flags, ec_wind_open, ec_manager_open,
callback, data);
if (fop == NULL) {
goto out;
@@ -1071,7 +1092,7 @@ ec_manager_readlink(ec_fop_data_t *fop, int32_t state)
void
ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
size_t size, dict_t *xdata)
{
ec_cbk_t callback = {.readlink = func};
@@ -1085,7 +1106,7 @@ ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(
- frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, minimum,
+ frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, fop_flags,
ec_wind_readlink, ec_manager_readlink, callback, data);
if (fop == NULL) {
goto out;
@@ -1331,6 +1352,7 @@ int32_t
ec_manager_readv(ec_fop_data_t *fop, int32_t state)
{
ec_cbk_data_t *cbk;
+ ec_t *ec = fop->xl->private;
switch (state) {
case EC_STATE_INIT:
@@ -1350,6 +1372,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
+ if (ec->read_mask) {
+ fop->mask &= ec->read_mask;
+ }
ec_dispatch_min(fop);
return EC_STATE_PREPARE_ANSWER;
@@ -1417,9 +1442,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state)
}
void
-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset,
- uint32_t flags, dict_t *xdata)
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.readv = func};
ec_fop_data_t *fop = NULL;
@@ -1432,8 +1457,8 @@ ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_readv, ec_manager_readv,
- callback, data);
+ target, fop_flags, ec_wind_readv,
+ ec_manager_readv, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1637,9 +1662,9 @@ ec_manager_seek(ec_fop_data_t *fop, int32_t state)
}
void
-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata)
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata)
{
ec_cbk_t callback = {.seek = func};
ec_fop_data_t *fop = NULL;
@@ -1652,7 +1677,7 @@ ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_seek, ec_manager_seek,
+ target, fop_flags, ec_wind_seek, ec_manager_seek,
callback, data);
if (fop == NULL) {
goto out;
@@ -1855,8 +1880,9 @@ ec_manager_stat(ec_fop_data_t *fop, int32_t state)
}
void
-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata)
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
{
ec_cbk_t callback = {.stat = func};
ec_fop_data_t *fop = NULL;
@@ -1869,7 +1895,7 @@ ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_stat, ec_manager_stat,
+ target, fop_flags, ec_wind_stat, ec_manager_stat,
callback, data);
if (fop == NULL) {
goto out;
@@ -1965,8 +1991,9 @@ ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
}
void
-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata)
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
{
ec_cbk_t callback = {.fstat = func};
ec_fop_data_t *fop = NULL;
@@ -1979,8 +2006,8 @@ ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED,
- target, minimum, ec_wind_fstat, ec_manager_stat,
- callback, data);
+ target, fop_flags, ec_wind_fstat,
+ ec_manager_stat, callback, data);
if (fop == NULL) {
goto out;
}
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
index b915a992a00..9b5fe2a7fdc 100644
--- a/xlators/cluster/ec/src/ec-inode-write.c
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -8,10 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
-#include "ec.h"
#include "ec-messages.h"
#include "ec-helpers.h"
#include "ec-common.h"
@@ -89,6 +85,8 @@ ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, uint64_t size)
goto out;
}
+ if (fop->locks[0].lock)
+ ec_lock_update_good(fop->locks[0].lock, fop);
vector.iov_base = iobuf->ptr;
vector.iov_len = size;
memset(vector.iov_base, 0, vector.iov_len);
@@ -183,26 +181,26 @@ ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
switch (fop->id) {
case GF_FOP_SETXATTR:
if (fop->cbks.setxattr) {
- fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
+ op_errno, xdata);
}
break;
case GF_FOP_REMOVEXATTR:
if (fop->cbks.removexattr) {
- fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
case GF_FOP_FSETXATTR:
if (fop->cbks.fsetxattr) {
- fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
case GF_FOP_FREMOVEXATTR:
if (fop->cbks.fremovexattr) {
- fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno,
- xdata);
+ QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
}
break;
}
@@ -281,7 +279,7 @@ ec_manager_xattr(ec_fop_data_t *fop, int32_t state)
void
ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_removexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
loc_t *loc, const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.removexattr = func};
@@ -295,7 +293,7 @@ ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target,
- minimum, ec_wind_removexattr, ec_manager_xattr,
+ fop_flags, ec_wind_removexattr, ec_manager_xattr,
callback, data);
if (fop == NULL) {
goto out;
@@ -361,7 +359,7 @@ ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fremovexattr_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
fd_t *fd, const char *name, dict_t *xdata)
{
ec_cbk_t callback = {.fremovexattr = func};
@@ -375,8 +373,8 @@ ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target,
- minimum, ec_wind_fremovexattr, ec_manager_xattr,
- callback, data);
+ fop_flags, ec_wind_fremovexattr,
+ ec_manager_xattr, callback, data);
if (fop == NULL) {
goto out;
}
@@ -492,16 +490,15 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
if (fop->id == GF_FOP_SETATTR) {
if (fop->cbks.setattr != NULL) {
- fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0],
- &cbk->iatt[1], cbk->xdata);
+ QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
} else {
if (fop->cbks.fsetattr != NULL) {
- fop->cbks.fsetattr(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
}
@@ -550,7 +547,7 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
void
ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
ec_cbk_t callback = {.setattr = func};
@@ -563,9 +560,9 @@ ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, minimum,
- ec_wind_setattr, ec_manager_setattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target,
+ fop_flags, ec_wind_setattr, ec_manager_setattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -627,7 +624,7 @@ ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
ec_cbk_t callback = {.fsetattr = func};
@@ -640,9 +637,9 @@ ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, minimum,
- ec_wind_fsetattr, ec_manager_setattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target,
+ fop_flags, ec_wind_fsetattr, ec_manager_setattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -707,7 +704,7 @@ ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
dict_t *dict, int32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.setxattr = func};
@@ -720,9 +717,9 @@ ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, minimum,
- ec_wind_setxattr, ec_manager_xattr, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target,
+ fop_flags, ec_wind_setxattr, ec_manager_xattr,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -825,7 +822,7 @@ ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
dict_t *dict, int32_t flags, dict_t *xdata)
{
ec_cbk_t callback = {.fsetxattr = func};
@@ -839,7 +836,7 @@ ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target,
- minimum, ec_wind_fsetxattr, ec_manager_xattr,
+ fop_flags, ec_wind_fsetxattr, ec_manager_xattr,
callback, data);
if (fop == NULL) {
goto out;
@@ -992,9 +989,9 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.fallocate != NULL) {
- fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1035,7 +1032,7 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
void
ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
int32_t mode, off_t offset, size_t len, dict_t *xdata)
{
ec_cbk_t callback = {.fallocate = func};
@@ -1049,8 +1046,8 @@ ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target,
- minimum, ec_wind_fallocate, ec_manager_fallocate,
- callback, data);
+ fop_flags, ec_wind_fallocate,
+ ec_manager_fallocate, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1209,8 +1206,8 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
ec_dispatch_all(fop);
return EC_STATE_DELAYED_START;
} else {
- /*Assume discard to have succeeded on mask*/
- fop->good = fop->mask;
+ /* Assume discard to have succeeded on all bricks */
+ ec_succeed_all(fop);
}
/* Fall through */
@@ -1245,9 +1242,9 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.discard != NULL) {
- fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -1289,7 +1286,7 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
void
ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
off_t offset, size_t len, dict_t *xdata)
{
ec_cbk_t callback = {.discard = func};
@@ -1302,9 +1299,9 @@ ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, minimum,
- ec_wind_discard, ec_manager_discard, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
+ fop_flags, ec_wind_discard, ec_manager_discard,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1405,6 +1402,7 @@ int32_t
ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
{
ec_cbk_data_t *cbk;
+ off_t offset_down;
switch (state) {
case EC_STATE_INIT:
@@ -1416,16 +1414,19 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
+ offset_down = fop->user_size;
+ ec_adjust_offset_down(fop->xl->private, &offset_down, _gf_true);
+
if (fop->id == GF_FOP_TRUNCATE) {
ec_lock_prepare_inode(
fop, &fop->loc[0],
EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
- fop->offset, EC_RANGE_FULL);
+ offset_down, EC_RANGE_FULL);
} else {
ec_lock_prepare_fd(
fop, fop->fd,
EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
- fop->offset, EC_RANGE_FULL);
+ offset_down, EC_RANGE_FULL);
}
ec_lock(fop);
@@ -1471,17 +1472,15 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
if (fop->id == GF_FOP_TRUNCATE) {
if (fop->cbks.truncate != NULL) {
- fop->cbks.truncate(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
} else {
if (fop->cbks.ftruncate != NULL) {
- fop->cbks.ftruncate(fop->req_frame, fop, fop->xl,
- cbk->op_ret, cbk->op_errno,
- &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
}
}
@@ -1530,7 +1529,7 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
void
ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.truncate = func};
@@ -1543,9 +1542,9 @@ ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, minimum,
- ec_wind_truncate, ec_manager_truncate, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target,
+ fop_flags, ec_wind_truncate, ec_manager_truncate,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1604,7 +1603,7 @@ ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
off_t offset, dict_t *xdata)
{
ec_cbk_t callback = {.ftruncate = func};
@@ -1618,8 +1617,8 @@ ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target,
- minimum, ec_wind_ftruncate, ec_manager_truncate,
- callback, data);
+ fop_flags, ec_wind_ftruncate,
+ ec_manager_truncate, callback, data);
if (fop == NULL) {
goto out;
}
@@ -1973,6 +1972,23 @@ ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which)
return found;
}
+static uintptr_t
+ec_get_lock_good_mask(inode_t *inode, xlator_t *xl)
+{
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ictx = NULL;
+ LOCK(&inode->lock);
+ {
+ ictx = __ec_inode_get(inode, xl);
+ if (ictx)
+ lock = ictx->inode_lock;
+ }
+ UNLOCK(&inode->lock);
+ if (lock)
+ return lock->good_mask;
+ return 0;
+}
+
void
ec_writev_start(ec_fop_data_t *fop)
{
@@ -2009,20 +2025,29 @@ ec_writev_start(ec_fop_data_t *fop)
if (err != 0) {
goto failed_fd;
}
+ tail = fop->size - fop->user_size - fop->head;
if (fop->head > 0) {
- found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
- if (!found_stripe) {
- if (ec_make_internal_fop_xdata(&xdata)) {
- err = -ENOMEM;
- goto failed_xdata;
+ if (current > fop->offset) {
+ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
+ if (!found_stripe) {
+ if (ec_make_internal_fop_xdata(&xdata)) {
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_head, NULL, fd,
+ ec->stripe_size, fop->offset, 0, xdata);
+ }
+ } else {
+ memset(fop->vector[0].iov_base, 0, fop->head);
+ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ if (ec->stripe_cache && (fop->size <= ec->stripe_size)) {
+ ec_add_stripe_in_cache(ec, fop);
}
- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
- ec_writev_merge_head, NULL, fd, ec->stripe_size,
- fop->offset, 0, xdata);
}
}
- tail = fop->size - fop->user_size - fop->head;
if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
/* Current locking scheme will make sure the 'current' below will
* never decrease while the fop is in progress, so the checks will
@@ -2035,8 +2060,10 @@ ec_writev_start(ec_fop_data_t *fop)
err = -ENOMEM;
goto failed_xdata;
}
- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN,
- ec_writev_merge_tail, NULL, fd, ec->stripe_size,
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_tail, NULL, fd,
+ ec->stripe_size,
fop->offset + fop->size - ec->stripe_size, 0, xdata);
}
} else {
@@ -2211,9 +2238,9 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
GF_ASSERT(cbk != NULL);
if (fop->cbks.writev != NULL) {
- fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret,
- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
- cbk->xdata);
+ QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
}
return EC_STATE_LOCK_REUSE;
@@ -2262,7 +2289,7 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
void
ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata)
{
@@ -2276,7 +2303,7 @@ ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags,
ec_wind_writev, ec_manager_writev, callback,
data);
if (fop == NULL) {
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
index 58c9acb6ded..601960d6154 100644
--- a/xlators/cluster/ec/src/ec-locks.c
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -8,13 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-
#include "ec-helpers.h"
#include "ec-common.h"
#include "ec-combine.h"
-#include "ec-method.h"
#include "ec-fops.h"
#include "ec-messages.h"
@@ -28,9 +24,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
ec_t *ec = fop->xl->private;
ec_cbk_data_t *ans = NULL;
ec_cbk_data_t *cbk = NULL;
- uintptr_t locked = 0, notlocked = 0;
+ uintptr_t locked = 0;
+ int32_t good = 0;
+ int32_t eagain = 0;
+ int32_t estale = 0;
int32_t error = -1;
+ /* There are some errors that we'll handle in an special way while trying
+ * to acquire a lock.
+ *
+ * EAGAIN: If it's found during a parallel non-blocking lock request, we
+ * consider that there's contention on the inode, so we consider
+ * the acquisition a failure and try again with a sequential
+ * blocking lock request. This will ensure that we get a lock on
+ * as many bricks as possible (ignoring EAGAIN here would cause
+ * unnecessary triggers of self-healing).
+ *
+ * If it's found during a sequential blocking lock request, it's
+ * considered an error. Lock will only succeed if there are
+ * enough other bricks locked.
+ *
+ * ESTALE: This can appear during parallel or sequential lock request if
+ * the inode has just been unlinked. We consider this error is
+ * not recoverable, but we also don't consider it as fatal. So,
+ * if it happens during parallel lock, we won't attempt a
+ * sequential one unless there are EAGAIN errors on other
+ * bricks (and are enough to form a quorum), but if we reach
+ * quorum counting the ESTALE bricks, we consider the whole
+ * result of the operation is ESTALE instead of EIO.
+ */
+
list_for_each_entry(ans, &fop->cbk_list, list)
{
if (ans->op_ret >= 0) {
@@ -38,24 +61,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
error = EIO;
}
locked |= ans->mask;
+ good = ans->count;
cbk = ans;
- } else {
- if (ans->op_errno == EAGAIN) {
- switch (fop->uint32) {
- case EC_LOCK_MODE_NONE:
- case EC_LOCK_MODE_ALL:
- /* Goal is to treat non-blocking lock as failure
- * even if there is a single EAGAIN*/
- notlocked |= ans->mask;
- break;
- }
- }
+ } else if (ans->op_errno == ESTALE) {
+ estale += ans->count;
+ } else if ((ans->op_errno == EAGAIN) &&
+ (fop->uint32 != EC_LOCK_MODE_INC)) {
+ eagain += ans->count;
}
}
if (error == -1) {
- if (gf_bits_count(locked | notlocked) >= ec->fragments) {
- if (notlocked == 0) {
+ /* If we have enough quorum with succeeded and EAGAIN answers, we
+ * ignore for now any ESTALE answer. If there are EAGAIN answers,
+ * we retry with a sequential blocking lock request if needed.
+ * Otherwise we succeed. */
+ if ((good + eagain) >= ec->fragments) {
+ if (eagain == 0) {
if (fop->answer == NULL) {
fop->answer = cbk;
}
@@ -68,21 +90,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
case EC_LOCK_MODE_NONE:
error = EAGAIN;
break;
-
case EC_LOCK_MODE_ALL:
fop->uint32 = EC_LOCK_MODE_INC;
break;
-
default:
+ /* This shouldn't happen because eagain cannot be > 0
+ * when fop->uint32 is EC_LOCK_MODE_INC. */
error = EIO;
break;
}
}
} else {
- if (fop->answer && fop->answer->op_ret < 0)
+ /* We have been unable to find enough candidates that will be able
+ * to take the lock. If we have quorum on some answer, we return
+ * it. Otherwise we check if ESTALE answers allow us to reach
+ * quorum. If so, we return ESTALE. */
+ if (fop->answer && fop->answer->op_ret < 0) {
error = fop->answer->op_errno;
- else
+ } else if ((good + eagain + estale) >= ec->fragments) {
+ error = ESTALE;
+ } else {
error = EIO;
+ }
}
}
@@ -275,7 +304,7 @@ ec_manager_entrylk(ec_fop_data_t *fop, int32_t state)
void
ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_entrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
const char *volume, loc_t *loc, const char *basename,
entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
@@ -285,13 +314,12 @@ ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
gf_msg_trace("ec", 0, "EC(ENTRYLK) %p", frame);
- VALIDATE_OR_GOTO(this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, minimum,
- ec_wind_entrylk, ec_manager_entrylk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target,
+ fop_flags, ec_wind_entrylk, ec_manager_entrylk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -404,7 +432,7 @@ ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
- int32_t minimum, fop_fentrylk_cbk_t func, void *data,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
@@ -417,9 +445,9 @@ ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, minimum,
- ec_wind_fentrylk, ec_manager_entrylk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target,
+ fop_flags, ec_wind_fentrylk, ec_manager_entrylk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -651,7 +679,7 @@ ec_manager_inodelk(ec_fop_data_t *fop, int32_t state)
void
ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
void *data, const char *volume, loc_t *loc, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -665,9 +693,9 @@ ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, minimum,
- ec_wind_inodelk, ec_manager_inodelk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target,
+ fop_flags, ec_wind_inodelk, ec_manager_inodelk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -783,7 +811,7 @@ ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
void
ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
void *data, const char *volume, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -797,9 +825,9 @@ ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, minimum,
- ec_wind_finodelk, ec_manager_inodelk, callback,
- data);
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target,
+ fop_flags, ec_wind_finodelk, ec_manager_inodelk,
+ callback, data);
if (fop == NULL) {
goto out;
}
@@ -1033,7 +1061,7 @@ ec_manager_lk(ec_fop_data_t *fop, int32_t state)
}
void
-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
@@ -1046,7 +1074,7 @@ ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum,
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, minimum,
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, fop_flags,
ec_wind_lk, ec_manager_lk, callback, data);
if (fop == NULL) {
goto out;
diff --git a/xlators/cluster/ec/src/ec-mem-types.h b/xlators/cluster/ec/src/ec-mem-types.h
index 7af8fe2ef49..3252c4c1c58 100644
--- a/xlators/cluster/ec/src/ec-mem-types.h
+++ b/xlators/cluster/ec/src/ec-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __EC_MEM_TYPES_H__
#define __EC_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_ec_mem_types_ {
ec_mt_ec_t = gf_common_mt_end + 1,
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
index 5f673d69aa4..72e98f11286 100644
--- a/xlators/cluster/ec/src/ec-messages.h
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -11,7 +11,7 @@
#ifndef _EC_MESSAGES_H_
#define _EC_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
+ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+ EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD);
#endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec-method.h b/xlators/cluster/ec/src/ec-method.h
index ca33f4ffdce..f91233b2f88 100644
--- a/xlators/cluster/ec/src/ec-method.h
+++ b/xlators/cluster/ec/src/ec-method.h
@@ -11,8 +11,6 @@
#ifndef __EC_METHOD_H__
#define __EC_METHOD_H__
-#include "xlator.h"
-
#include "ec-types.h"
#include "ec-galois.h"
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 80d9c0d4014..de9b89bb2c9 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -11,10 +11,10 @@
#ifndef __EC_TYPES_H__
#define __EC_TYPES_H__
-#include "xlator.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
#include "libxlator.h"
-#include "atomic.h"
+#include <glusterfs/atomic.h>
#define EC_GF_MAX_REGS 16
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
+enum _ec_heal_need {
+ EC_HEAL_NONEED,
+ EC_HEAL_MAYBE,
+ EC_HEAL_MUST,
+ EC_HEAL_PURGE_INDEX
+};
enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
@@ -150,6 +155,7 @@ struct _ec_fd {
loc_t loc;
uintptr_t open;
int32_t flags;
+ uint64_t bad_version;
ec_fd_status_t fd_status[0];
};
@@ -171,6 +177,7 @@ struct _ec_inode {
gf_boolean_t have_config;
gf_boolean_t have_version;
gf_boolean_t have_size;
+ int32_t heal_count;
ec_config_t config;
uint64_t pre_version[2];
uint64_t post_version[2];
@@ -179,14 +186,15 @@ struct _ec_inode {
uint64_t dirty[2];
struct list_head heal;
ec_stripe_list_t stripe_cache;
+ uint64_t bad_version;
};
typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
int32_t, uintptr_t, uintptr_t, uintptr_t,
- dict_t *);
+ uint32_t, dict_t *);
union _ec_cbk {
fop_access_cbk_t access;
@@ -264,6 +272,7 @@ struct _ec_lock {
uint32_t refs_pending; /* Refs assigned to fops being prepared */
uint32_t waiting_flags; /*Track xattrop/dirty marking*/
gf_boolean_t acquired;
+ gf_boolean_t contention;
gf_boolean_t unlock_now;
gf_boolean_t release;
gf_boolean_t query;
@@ -307,9 +316,9 @@ struct _ec_fop_data {
int32_t id; /* ID of the file operation */
int32_t refs;
int32_t state;
- int32_t minimum; /* Minimum number of successful
- operation required to conclude a
- fop as successful */
+ uint32_t minimum; /* Minimum number of successful
+ operation required to conclude a
+ fop as successful */
int32_t expected;
int32_t winds;
int32_t jobs;
@@ -324,11 +333,12 @@ struct _ec_fop_data {
ec_cbk_data_t *answer; /* accepted answer */
int32_t lock_count;
int32_t locked;
+ gf_lock_t lock;
ec_lock_link_t locks[2];
int32_t first_lock;
- gf_lock_t lock;
- uint32_t flags;
+ uint32_t fop_flags; /* Flags passed by the caller. */
+ uint32_t flags; /* Internal flags. */
uint32_t first;
uintptr_t mask;
uintptr_t healing; /*Dispatch is done but call is successful only
@@ -616,6 +626,11 @@ struct _ec_statistics {
requests. (Basically memory allocation
errors). */
} stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
};
struct _ec {
@@ -641,6 +656,8 @@ struct _ec {
uintptr_t xl_notify; /* Bit flag representing
notification for bricks. */
uintptr_t node_mask;
+ uintptr_t read_mask; /*Stores user defined read-mask*/
+ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */
xlator_t **xl_list;
gf_lock_t lock;
gf_timer_t *timer;
@@ -650,6 +667,7 @@ struct _ec {
gf_boolean_t optimistic_changelog;
gf_boolean_t parallel_writes;
uint32_t stripe_cache;
+ uint32_t quorum_count;
uint32_t background_heals;
uint32_t heal_wait_qlen;
uint32_t self_heal_window_size; /* max size of read/writes */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index b56dd9ada3e..7344be4968d 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "defaults.h"
-#include "statedump.h"
-#include "compat-errno.h"
-#include "upcall-utils.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/upcall-utils.h>
#include "ec.h"
#include "ec-messages.h"
@@ -23,7 +23,7 @@
#include "ec-method.h"
#include "ec-code.h"
#include "ec-heald.h"
-#include "events.h"
+#include <glusterfs/events.h>
static char *ec_read_policies[EC_READ_POLICY_MAX + 1] = {
[EC_ROUND_ROBIN] = "round-robin",
@@ -285,6 +285,7 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool,
failed);
GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed);
+ GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed);
ret = 0;
if (ec_assign_read_policy(ec, read_policy)) {
ret = -1;
@@ -324,13 +325,18 @@ ec_get_event_from_state(ec_t *ec)
void
ec_up(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 1;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP, "Going UP");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
}
@@ -338,13 +344,18 @@ ec_up(xlator_t *this, ec_t *ec)
void
ec_down(xlator_t *this, ec_t *ec)
{
+ char str1[32], str2[32];
+
if (ec->timer != NULL) {
gf_timer_call_cancel(this->ctx, ec->timer);
ec->timer = NULL;
}
ec->up = 0;
- gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN, "Going DOWN");
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
}
@@ -355,6 +366,7 @@ ec_notify_cbk(void *data)
ec_t *ec = data;
glusterfs_event_t event = GF_EVENT_MAXVAL;
gf_boolean_t propagate = _gf_false;
+ gf_boolean_t launch_heal = _gf_false;
LOCK(&ec->lock);
{
@@ -384,6 +396,11 @@ ec_notify_cbk(void *data)
* still bricks DOWN, they will be healed when they
* come up. */
ec_up(ec->xl, ec);
+
+ if (ec->shd.iamshd && !ec->shutdown) {
+ launch_heal = _gf_true;
+ GF_ATOMIC_INC(ec->async_fop_count);
+ }
}
propagate = _gf_true;
@@ -391,13 +408,12 @@ ec_notify_cbk(void *data)
unlock:
UNLOCK(&ec->lock);
+ if (launch_heal) {
+ /* We have just brought the volume UP, so we trigger
+ * a self-heal check on the root directory. */
+ ec_launch_replace_heal(ec);
+ }
if (propagate) {
- if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
- /* We have just brought the volume UP, so we trigger
- * a self-heal check on the root directory. */
- ec_launch_replace_heal(ec);
- }
-
default_notify(ec->xl, event, NULL);
}
}
@@ -425,10 +441,55 @@ ec_disable_delays(ec_t *ec)
{
ec->shutdown = _gf_true;
- return list_empty(&ec->pending_fops);
+ return __ec_is_last_fop(ec);
}
void
+ec_cleanup_healer_object(ec_t *ec)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ void *res = NULL;
+ int i = 0;
+ gf_boolean_t is_join = _gf_false;
+
+ shd = &ec->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < ec->nodes; i++) {
+ healer = &shd->index_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
+ }
+
+ healer = &shd->full_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
+ }
+ }
+}
+void
ec_pending_fops_completed(ec_t *ec)
{
if (ec->shutdown) {
@@ -441,6 +502,9 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
{
uintptr_t current_state = 0;
+ if (xlator_is_cleanup_starting(ec->xl))
+ return _gf_false;
+
if ((ec->xl_notify & index_mask) == 0) {
ec->xl_notify |= index_mask;
ec->xl_notify_count++;
@@ -462,6 +526,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
struct gf_upcall_cache_invalidation *ci = NULL;
struct gf_upcall_inodelk_contention *lc = NULL;
inode_t *inode;
+ inode_table_t *table;
switch (upcall->event_type) {
case GF_UPCALL_CACHE_INVALIDATION:
@@ -475,8 +540,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall)
/* The lock is not owned by EC, ignore it. */
return _gf_true;
}
- inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable,
- upcall->gfid);
+ table = ((xlator_t *)ec->xl->graph->top)->itable;
+ if (table == NULL) {
+ /* Self-heal daemon doesn't have an inode table on the top
+ * xlator because it doesn't need it. In this case we should
+ * use the inode table managed by EC itself where all inodes
+ * being healed should be present. However self-heal doesn't
+ * use eager-locking and inodelk's are already released as
+ * soon as possible. In this case we can safely ignore these
+ * notifications. */
+ return _gf_false;
+ }
+ inode = inode_find(table, upcall->gfid);
/* If inode is not found, it means that it's already released,
* so we can ignore it. Probably it has been released and
* destroyed while the contention notification was being sent.
@@ -544,6 +619,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
/* If there aren't pending fops running after we have waken up
* them, we immediately propagate the notification. */
propagate = ec_disable_delays(ec);
+ ec_cleanup_healer_object(ec);
goto unlock;
}
@@ -554,7 +630,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
if (event == GF_EVENT_CHILD_UP) {
/* We need to trigger a selfheal if a brick changes
* to UP state. */
- needs_shd_check = ec_set_up_state(ec, mask, mask);
+ if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
+ !ec->shutdown) {
+ needs_shd_check = _gf_true;
+ }
} else if (event == GF_EVENT_CHILD_DOWN) {
ec_set_up_state(ec, mask, 0);
}
@@ -584,17 +663,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
}
} else {
propagate = _gf_false;
+ needs_shd_check = _gf_false;
+ }
+
+ if (needs_shd_check) {
+ GF_ATOMIC_INC(ec->async_fop_count);
}
}
unlock:
UNLOCK(&ec->lock);
done:
+ if (needs_shd_check) {
+ ec_launch_replace_heal(ec);
+ }
if (propagate) {
- if (needs_shd_check && ec->shd.iamshd) {
- ec_launch_replace_heal(ec);
- }
-
error = default_notify(this, event, data);
}
@@ -627,6 +710,69 @@ ec_statistics_init(ec_t *ec)
GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
+}
+
+static int
+ec_assign_read_mask(ec_t *ec, char *read_mask_str)
+{
+ char *mask = NULL;
+ char *maskptr = NULL;
+ char *saveptr = NULL;
+ char *id_str = NULL;
+ int id = 0;
+ int ret = 0;
+ uintptr_t read_mask = 0;
+
+ if (!read_mask_str) {
+ ec->read_mask = 0;
+ ret = 0;
+ goto out;
+ }
+
+ mask = gf_strdup(read_mask_str);
+ if (!mask) {
+ ret = -1;
+ goto out;
+ }
+ maskptr = mask;
+
+ for (;;) {
+ id_str = strtok_r(maskptr, ":", &saveptr);
+ if (id_str == NULL)
+ break;
+ if (gf_string2int(id_str, &id)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %s is not a valid integer",
+ read_mask_str, id_str);
+ ret = -1;
+ goto out;
+ }
+
+ if ((id < 0) || (id >= ec->nodes)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %d is not in range [0 - %d]",
+ read_mask_str, id, ec->nodes - 1);
+ ret = -1;
+ goto out;
+ }
+ read_mask |= (1UL << id);
+ maskptr = NULL;
+ }
+
+ if (gf_bits_count(read_mask) < ec->fragments) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "read-mask \"%s\" should contain at least %d ids", read_mask_str,
+ ec->fragments);
+ ret = -1;
+ goto out;
+ }
+ ec->read_mask = read_mask;
+ ret = 0;
+out:
+ GF_FREE(mask);
+ return ret;
}
int32_t
@@ -636,6 +782,7 @@ init(xlator_t *this)
char *read_policy = NULL;
char *extensions = NULL;
int32_t err;
+ char *read_mask_str = NULL;
if (this->parents == NULL) {
gf_msg(this->name, GF_LOG_WARNING, 0, EC_MSG_NO_PARENTS,
@@ -656,6 +803,7 @@ init(xlator_t *this)
ec->xl = this;
LOCK_INIT(&ec->lock);
+ GF_ATOMIC_INIT(ec->async_fop_count, 0);
INIT_LIST_HEAD(&ec->pending_fops);
INIT_LIST_HEAD(&ec->heal_waiting);
INIT_LIST_HEAD(&ec->healing);
@@ -714,12 +862,18 @@ init(xlator_t *this)
if (ec_assign_read_policy(ec, read_policy))
goto failed;
+ GF_OPTION_INIT("heal-timeout", ec->shd.timeout, int32, failed);
GF_OPTION_INIT("shd-max-threads", ec->shd.max_threads, uint32, failed);
GF_OPTION_INIT("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed);
GF_OPTION_INIT("optimistic-change-log", ec->optimistic_changelog, bool,
failed);
GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);
GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
+ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+ GF_OPTION_INIT("ec-read-mask", read_mask_str, str, failed);
+
+ if (ec_assign_read_mask(ec, read_mask_str))
+ goto failed;
this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
if (!this->itable)
@@ -759,6 +913,7 @@ failed:
void
fini(xlator_t *this)
{
+ ec_selfheal_daemon_fini(this);
__ec_destroy_private(this);
}
@@ -797,11 +952,12 @@ ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
if (cmd == ENTRYLK_UNLOCK)
- minimum = EC_MINIMUM_ONE;
- ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL, volume, loc,
- basename, cmd, type, xdata);
+ fop_flags = EC_MINIMUM_ONE;
+ ec_entrylk(frame, this, -1, fop_flags, default_entrylk_cbk, NULL, volume,
+ loc, basename, cmd, type, xdata);
return 0;
}
@@ -811,10 +967,11 @@ ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, const char *basename, entrylk_cmd cmd,
entrylk_type type, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
if (cmd == ENTRYLK_UNLOCK)
- minimum = EC_MINIMUM_ONE;
- ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL, volume,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_fentrylk(frame, this, -1, fop_flags, default_fentrylk_cbk, NULL, volume,
fd, basename, cmd, type, xdata);
return 0;
@@ -905,7 +1062,7 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
{
int error = 0;
ec_t *ec = this->private;
- int32_t minimum = EC_MINIMUM_ONE;
+ int32_t fop_flags = EC_MINIMUM_ONE;
if (name && strcmp(name, EC_XATTR_HEAL) != 0) {
EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
@@ -920,11 +1077,11 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) ||
XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) {
- minimum = EC_MINIMUM_ALL;
+ fop_flags = EC_MINIMUM_ALL;
}
- ec_getxattr(frame, this, -1, minimum, default_getxattr_cbk, NULL, loc, name,
- xdata);
+ ec_getxattr(frame, this, -1, fop_flags, default_getxattr_cbk, NULL, loc,
+ name, xdata);
return 0;
out:
@@ -954,11 +1111,12 @@ int32_t
ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
+ fop_flags = EC_MINIMUM_ONE;
- ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum,
+ ec_inodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata);
return 0;
@@ -968,10 +1126,11 @@ int32_t
ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
- ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_finodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata);
return 0;
@@ -991,10 +1150,11 @@ int32_t
ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
- int32_t minimum = EC_MINIMUM_ALL;
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
if (flock->l_type == F_UNLCK)
- minimum = EC_MINIMUM_ONE;
- ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd, flock,
+ fop_flags = EC_MINIMUM_ONE;
+ ec_lk(frame, this, -1, fop_flags, default_lk_cbk, NULL, fd, cmd, flock,
xdata);
return 0;
@@ -1389,6 +1549,10 @@ ec_dump_private(xlator_t *this)
gf_proc_dump_write("childs_up", "%u", ec->xl_up_count);
gf_proc_dump_write("childs_up_mask", "%s",
ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes));
+ if (ec->read_mask) {
+ gf_proc_dump_write("read-mask", "%s",
+ ec_bin(tmp, sizeof(tmp), ec->read_mask, ec->nodes));
+ }
gf_proc_dump_write("background-heals", "%d", ec->background_heals);
gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen);
gf_proc_dump_write("self-heal-window-size", "%" PRIu32,
@@ -1397,6 +1561,7 @@ ec_dump_private(xlator_t *this)
gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
+ gf_proc_dump_write("quorum-count", "%u", ec->quorum_count);
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
this->type, this->name);
@@ -1416,6 +1581,10 @@ ec_dump_private(xlator_t *this)
GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
@@ -1666,4 +1835,39 @@ struct volume_options options[] = {
"specially for sequential writes. However, this will also"
"lead to extra memory consumption, maximum "
"(cache size * stripe size) Bytes per open file."},
- {.key = {NULL}}};
+ {
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description =
+ "This option can be used to define how many successes on"
+ "the bricks constitute a success to the application. This"
+ " count should be in the range"
+ "[disperse-data-count, disperse-count] (inclusive)",
+ },
+ {
+ .key = {"ec-read-mask"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = NULL,
+ .description = "This option can be used to choose which bricks can be"
+ " used for reading data/metadata of a file/directory",
+ },
+ {
+ .key = {NULL},
+ },
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "disperse",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
index 1b210d9adc1..6f6de6d5981 100644
--- a/xlators/cluster/ec/src/ec.h
+++ b/xlators/cluster/ec/src/ec.h
@@ -18,6 +18,7 @@
#define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
#define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
#define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
#define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
#define EC_STRIPE_CACHE_MAX_SIZE 10
#define EC_VERSION_SIZE 2
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
deleted file mode 100644
index 2b594567db1..00000000000
--- a/xlators/cluster/stripe/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-xlator_LTLIBRARIES = stripe.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-
-stripe_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-
-
-stripe_la_SOURCES = stripe.c stripe-helpers.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
-
-stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = stripe.h stripe-mem-types.h \
- $(top_builddir)/xlators/lib/src/libxlator.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
deleted file mode 100644
index c3743723d1d..00000000000
--- a/xlators/cluster/stripe/src/stripe-helpers.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <fnmatch.h>
-
-#include "stripe.h"
-#include "byte-order.h"
-#include "mem-types.h"
-#include "logging.h"
-
-void
-stripe_local_wipe(stripe_local_t *local)
-{
- if (!local)
- goto out;
-
- loc_wipe(&local->loc);
- loc_wipe(&local->loc2);
-
- if (local->fd)
- fd_unref(local->fd);
-
- if (local->inode)
- inode_unref(local->inode);
-
- if (local->xattr)
- dict_unref(local->xattr);
-
- if (local->xdata)
- dict_unref(local->xdata);
-
-out:
- return;
-}
-
-int
-stripe_aggregate(dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
-
- dst = data;
-
- if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin(dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char);
- if (size == NULL) {
- gf_log("stripe", GF_LOG_WARNING, "memory allocation failed");
- goto out;
- }
- ret = dict_set_bin(dst, key, size, sizeof(int64_t));
- if (ret < 0) {
- gf_log("stripe", GF_LOG_WARNING,
- "stripe aggregate dict set failed");
- GF_FREE(size);
- goto out;
- }
- }
-
- ptr = data_to_bin(value);
- if (ptr == NULL) {
- gf_log("stripe", GF_LOG_WARNING, "data to bin failed");
- goto out;
- }
-
- *size = hton64(ntoh64(*size) + ntoh64(*ptr));
- } else if (strcmp(key, GF_CONTENT_KEY)) {
- /* No need to aggregate 'CONTENT' data */
- ret = dict_set(dst, key, value);
- if (ret)
- gf_log("stripe", GF_LOG_WARNING, "xattr dict set failed");
- }
-
-out:
- return 0;
-}
-
-void
-stripe_aggregate_xattr(dict_t *dst, dict_t *src)
-{
- if ((dst == NULL) || (src == NULL)) {
- goto out;
- }
-
- dict_foreach(src, stripe_aggregate, dst);
-out:
- return;
-}
-
-int32_t
-stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total)
-{
- int32_t i = 0;
- int32_t ret = -1;
- int32_t len = 0;
- char *sbuf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!buffer || !local || !local->xattr_list)
- goto out;
-
- sbuf = buffer;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->xattr_len - 1; /* length includes \0 */
-
- if (len && xattr && xattr->xattr_value) {
- memcpy(buffer, xattr->xattr_value, len);
- buffer += len;
- *buffer++ = ' ';
- }
- }
-
- *--buffer = '\0';
- if (total)
- *total = buffer - sbuf;
- ret = 0;
-
-out:
- return ret;
-}
-
-int32_t
-stripe_free_xattr_str(stripe_local_t *local)
-{
- int32_t i = 0;
- int32_t ret = -1;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!local || !local->xattr_list)
- goto out;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
-
- if (xattr && xattr->xattr_value)
- GF_FREE(xattr->xattr_value);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local,
- void **xattr_serz)
-{
- int32_t ret = -1, i = 0, len = 0;
- dict_t *tmp1 = NULL, *tmp2 = NULL;
- char *buf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (xattr_serz == NULL) {
- goto out;
- }
-
- tmp2 = dict_new();
-
- if (tmp2 == NULL) {
- goto out;
- }
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->xattr_len;
-
- if (len && xattr && xattr->xattr_value) {
- ret = dict_reset(tmp2);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_DEBUG, "dict_reset failed (%s)",
- strerror(-ret));
- }
-
- ret = dict_unserialize(xattr->xattr_value, xattr->xattr_len, &tmp2);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "dict_unserialize failed (%s)", strerror(-ret));
- ret = -1;
- goto out;
- }
-
- tmp1 = dict_copy(tmp2, tmp1);
- if (tmp1 == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "dict_copy failed (%s)",
- strerror(-ret));
- ret = -1;
- goto out;
- }
- }
- }
-
- len = dict_serialized_length(tmp1);
- if (len > 0) {
- buf = GF_CALLOC(1, len, gf_common_mt_dict_t);
- if (buf == NULL) {
- ret = -1;
- goto out;
- }
-
- ret = dict_serialize(tmp1, buf);
- if (ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "dict_serialize failed (%s)",
- strerror(-ret));
- GF_FREE(buf);
- ret = -1;
- goto out;
- }
-
- *xattr_serz = buf;
- }
-
- ret = 0;
-out:
- if (tmp1 != NULL) {
- dict_unref(tmp1);
- }
-
- if (tmp2 != NULL) {
- dict_unref(tmp2);
- }
-
- return ret;
-}
-
-int32_t
-stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local,
- char **xattr_serz)
-{
- int ret = -1;
- int32_t padding = 0;
- int32_t tlen = 0;
- int len = 0;
- char stripe_size_str[20] = {
- 0,
- };
- char *pathinfo_serz = NULL;
-
- if (!local) {
- gf_log(this->name, GF_LOG_ERROR, "Possible NULL deref");
- goto out;
- }
-
- len = snprintf(stripe_size_str, sizeof(stripe_size_str), "%" PRId64,
- local->fctx ? local->fctx->stripe_size : 0);
- if (len < 0 || len >= sizeof(stripe_size_str))
- goto out;
- /* extra bytes for decorations (brackets and <>'s) */
- padding = strlen(this->name) + SLEN(STRIPE_PATHINFO_HEADER) + len + 7;
- local->xattr_total_len += (padding + 2);
-
- pathinfo_serz = GF_MALLOC(local->xattr_total_len, gf_common_mt_char);
- if (!pathinfo_serz)
- goto out;
-
- /* xlator info */
- (void)sprintf(pathinfo_serz, "(<" STRIPE_PATHINFO_HEADER "%s:[%s]> ",
- this->name, stripe_size_str);
-
- ret = stripe_xattr_aggregate(pathinfo_serz + padding, local, &tlen);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Cannot aggregate pathinfo list");
- GF_FREE(pathinfo_serz);
- goto out;
- }
-
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
-
- *xattr_serz = pathinfo_serz;
-
- ret = 0;
-out:
- return ret;
-}
-
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs(const char *path, stripe_private_t *priv)
-{
- struct stripe_options *trav = NULL;
- uint64_t block_size = 0;
-
- GF_VALIDATE_OR_GOTO("stripe", priv, out);
- GF_VALIDATE_OR_GOTO("stripe", path, out);
-
- LOCK(&priv->lock);
- {
- block_size = priv->block_size;
- trav = priv->pattern;
- while (trav) {
- if (!fnmatch(trav->path_pattern, path, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
- }
- UNLOCK(&priv->lock);
-
-out:
- return block_size;
-}
-
-int32_t
-stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local,
- dict_t *dict)
-{
- char key[256] = {
- 0,
- };
- data_t *data = NULL;
- int32_t index = 0;
- stripe_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!local->fctx) {
- local->fctx = GF_CALLOC(1, sizeof(stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf(key, "trusted.%s.stripe-size", this->name);
- data = dict_get(dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-size");
- goto out;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size = data_to_int64(data);
- }
-
- if (local->fctx->stripe_size != data_to_int64(data)) {
- gf_log(this->name, GF_LOG_WARNING,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
-
- /* Stripe count */
- sprintf(key, "trusted.%s.stripe-count", this->name);
- data = dict_get(dict, key);
-
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-count");
- goto out;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32(data);
- if (!local->fctx->stripe_count) {
- gf_log(this->name, GF_LOG_ERROR, "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
-
- local->fctx->xl_array = GF_CALLOC(local->fctx->stripe_count,
- sizeof(xlator_t *),
- gf_stripe_mt_xlator_t);
-
- if (!local->fctx->xl_array) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
- if (local->fctx->stripe_count != data_to_int32(data)) {
- gf_log(this->name, GF_LOG_ERROR,
- "error with stripe-count xattr (%d != %d)",
- local->fctx->stripe_count, data_to_int32(data));
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
-
- /* index */
- sprintf(key, "trusted.%s.stripe-index", this->name);
- data = dict_get(dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log(this->name, GF_LOG_ERROR, "Failed to get stripe-index");
- goto out;
- }
- index = data_to_int32(data);
- if (index > priv->child_count) {
- gf_log(this->name, GF_LOG_ERROR, "error with stripe-index xattr (%d)",
- index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
- if (local->fctx->xl_array) {
- if (!local->fctx->xl_array[index])
- local->fctx->xl_array[index] = prev->this;
- }
-
- sprintf(key, "trusted.%s.stripe-coalesce", this->name);
- data = dict_get(dict, key);
- if (!data) {
- /*
- * The file was probably created prior to coalesce support.
- * Assume non-coalesce mode for this file to maintain backwards
- * compatibility.
- */
- gf_log(this->name, GF_LOG_DEBUG,
- "missing stripe-coalesce "
- "attr, assume non-coalesce mode");
- local->fctx->stripe_coalesce = 0;
- } else {
- local->fctx->stripe_coalesce = data_to_int32(data);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size,
- uint32_t stripe_count, uint32_t stripe_index,
- uint32_t stripe_coalesce)
-{
- char key[256] = {
- 0,
- };
- int32_t ret = -1;
-
- sprintf(key, "trusted.%s.stripe-size", this->name);
- ret = dict_set_int64(dict, key, stripe_size);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-count", this->name);
- ret = dict_set_int32(dict, key, stripe_count);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-index", this->name);
- ret = dict_set_int32(dict, key, stripe_index);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req dict",
- key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-coalesce", this->name);
- ret = dict_set_int32(dict, key, stripe_coalesce);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "failed to set %s in xattr_req_dict",
- key);
- goto out;
- }
-out:
- return ret;
-}
-
-static int
-set_default_block_size(stripe_private_t *priv, char *num)
-{
- int ret = -1;
- GF_VALIDATE_OR_GOTO("stripe", THIS, out);
- GF_VALIDATE_OR_GOTO(THIS->name, priv, out);
- GF_VALIDATE_OR_GOTO(THIS->name, num, out);
-
- if (gf_string2bytesize_uint64(num, &priv->block_size) != 0) {
- gf_log(THIS->name, GF_LOG_ERROR, "invalid number format \"%s\"", num);
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-int
-set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data)
-{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- if (!this || !priv || !data)
- goto out;
-
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r(data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = gf_strdup(stripe_str);
- stripe_opt = GF_CALLOC(1, sizeof(struct stripe_options),
- gf_stripe_mt_stripe_options);
- if (!stripe_opt) {
- goto out;
- }
-
- pattern = strtok_r(dup_str, ":", &tmp_str1);
- num = strtok_r(NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
- ret = set_default_block_size(priv, num);
- if (ret)
- goto out;
- }
- if (gf_string2bytesize_uint64(num, &stripe_opt->block_size) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "invalid number format \"%s\"",
- num);
- goto out;
- }
-
- if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
- gf_log(this->name, GF_LOG_ERROR,
- "Invalid Block-size: "
- "%s. Should be at least %llu bytes",
- num, STRIPE_MIN_BLOCK_SIZE);
- goto out;
- }
- if (stripe_opt->block_size % 512) {
- gf_log(this->name, GF_LOG_ERROR,
- "Block-size: %s should"
- " be a multiple of 512 bytes",
- num);
- goto out;
- }
-
- memcpy(stripe_opt->path_pattern, pattern, strlen(pattern));
-
- gf_log(this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %" PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
-
- if (priv->pattern)
- temp_stripeopt = NULL;
- else
- temp_stripeopt = priv->pattern;
-
- stripe_opt->next = temp_stripeopt;
-
- priv->pattern = stripe_opt;
- stripe_opt = NULL;
-
- GF_FREE(dup_str);
- dup_str = NULL;
-
- stripe_str = strtok_r(NULL, ",", &tmp_str);
- }
-
- ret = 0;
-out:
-
- GF_FREE(dup_str);
-
- GF_FREE(stripe_opt);
-
- return ret;
-}
-
-int32_t
-stripe_iatt_merge(struct iatt *from, struct iatt *to)
-{
- if (to->ia_size < from->ia_size)
- to->ia_size = from->ia_size;
- if (to->ia_mtime < from->ia_mtime)
- to->ia_mtime = from->ia_mtime;
- if (to->ia_ctime < from->ia_ctime)
- to->ia_ctime = from->ia_ctime;
- if (to->ia_atime < from->ia_atime)
- to->ia_atime = from->ia_atime;
- return 0;
-}
-
-off_t
-coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
-{
- size_t line_size = 0;
- uint64_t stripe_num = 0;
- off_t coalesced_offset = 0;
-
- line_size = stripe_size * stripe_count;
- stripe_num = offset / line_size;
-
- coalesced_offset = (stripe_num * stripe_size) + (offset % stripe_size);
-
- return coalesced_offset;
-}
-
-off_t
-uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
- int stripe_index)
-{
- uint64_t nr_full_stripe_chunks = 0, mod = 0;
-
- if (!size)
- return size;
-
- /*
- * Estimate the number of fully written stripes from the
- * local file size. Each stripe_size chunk corresponds to
- * a stripe.
- */
- nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
- mod = size % stripe_size;
-
- if (!mod) {
- /*
- * There is no remainder, thus we could have overestimated
- * the size of the file in terms of chunks. Trim the number
- * of chunks by the following stripe members and leave it
- * up to those nodes to respond with a larger size (if
- * necessary).
- */
- nr_full_stripe_chunks -= stripe_count - (stripe_index + 1);
- size = nr_full_stripe_chunks * stripe_size;
- } else {
- /*
- * There is a remainder and thus we own the last chunk of the
- * file. Add the preceding stripe members of the final stripe
- * along with the remainder to calculate the exact size.
- */
- nr_full_stripe_chunks += stripe_index;
- size = nr_full_stripe_chunks * stripe_size + mod;
- }
-
- return size;
-}
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
deleted file mode 100644
index dcbef31212b..00000000000
--- a/xlators/cluster/stripe/src/stripe-mem-types.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __STRIPE_MEM_TYPES_H__
-#define __STRIPE_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_stripe_mem_types_ {
- gf_stripe_mt_iovec = gf_common_mt_end + 1,
- gf_stripe_mt_stripe_replies,
- gf_stripe_mt_stripe_fd_ctx_t,
- gf_stripe_mt_char,
- gf_stripe_mt_int8_t,
- gf_stripe_mt_int32_t,
- gf_stripe_mt_xlator_t,
- gf_stripe_mt_stripe_private_t,
- gf_stripe_mt_stripe_options,
- gf_stripe_mt_xattr_sort_t,
- gf_stripe_mt_end
-};
-#endif
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
deleted file mode 100644
index 6010c1ed6c3..00000000000
--- a/xlators/cluster/stripe/src/stripe.c
+++ /dev/null
@@ -1,5612 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/**
- * xlators/cluster/stripe:
- * Stripe translator, stripes the data across its child nodes,
- * as per the options given in the volfile. The striping works
- * fairly simple. It writes files at different offset as per
- * calculation. So, 'ls -l' output at the real posix level will
- * show file size bigger than the actual size. But when one does
- * 'df' or 'du <file>', real size of the file on the server is shown.
- *
- * WARNING:
- * Stripe translator can't regenerate data if a child node gets disconnected.
- * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
- * very much necessary, or else, use it in combination with AFR, to have a
- * backup copy.
- */
-#include <fnmatch.h>
-#include "stripe.h"
-#include "libxlator.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-struct volume_options options[];
-
-int32_t
-stripe_sh_chown_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop,
- struct iatt *postop, dict_t *xdata)
-{
- int callcnt = -1;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_DESTROY(frame);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_sh_make_entry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!frame || !frame->local || !cookie || !this) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- STACK_WIND(frame, stripe_sh_chown_cbk, prev->this,
- prev->this->fops->setattr, &local->loc, &local->stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
-
-out:
- return 0;
-}
-
-int32_t
-stripe_entry_self_heal(call_frame_t *frame, xlator_t *this,
- stripe_local_t *local)
-{
- xlator_list_t *trav = NULL;
- call_frame_t *rframe = NULL;
- stripe_local_t *rlocal = NULL;
- stripe_private_t *priv = NULL;
- dict_t *xdata = NULL;
- int ret = 0;
-
- if (!local || !this || !frame) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- if (!(IA_ISREG(local->stbuf.ia_type) || IA_ISDIR(local->stbuf.ia_type)))
- return 0;
-
- priv = this->private;
- trav = this->children;
- rframe = copy_frame(frame);
- if (!rframe) {
- goto out;
- }
- rlocal = mem_get0(this->local_pool);
- if (!rlocal) {
- goto out;
- }
- rframe->local = rlocal;
- rlocal->call_count = priv->child_count;
- loc_copy(&rlocal->loc, &local->loc);
- memcpy(&rlocal->stbuf, &local->stbuf, sizeof(struct iatt));
-
- xdata = dict_new();
- if (!xdata)
- goto out;
-
- ret = dict_set_gfuuid(xdata, "gfid-req", local->stbuf.ia_gfid, true);
- if (ret)
- gf_log(this->name, GF_LOG_WARNING, "%s: failed to set gfid-req",
- local->loc.path);
-
- while (trav) {
- if (IA_ISREG(local->stbuf.ia_type)) {
- STACK_WIND(
- rframe, stripe_sh_make_entry_cbk, trav->xlator,
- trav->xlator->fops->mknod, &local->loc,
- st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
- 0, xdata);
- }
- if (IA_ISDIR(local->stbuf.ia_type)) {
- STACK_WIND(
- rframe, stripe_sh_make_entry_cbk, trav->xlator,
- trav->xlator->fops->mkdir, &local->loc,
- st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
- xdata);
- }
- trav = trav->next;
- }
-
- if (xdata)
- dict_unref(xdata);
- return 0;
-
-out:
- if (rframe)
- STRIPE_STACK_DESTROY(rframe);
-
- return 0;
-}
-
-int32_t
-stripe_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if ((op_errno != ENOENT) && (op_errno != ESTALE))
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if (local->op_errno != ESTALE)
- local->op_errno = op_errno;
- if (((op_errno != ENOENT) && (op_errno != ENOTCONN) &&
- (op_errno != ESTALE)) ||
- (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- if (op_errno == ENOENT)
- local->entry_self_heal_needed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
- if (IA_ISREG(buf->ia_type)) {
- ret = stripe_ctx_handle(this, prev, local, xdata);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from"
- " dict");
- }
-
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->inode = inode_ref(inode);
- if (xdata)
- local->xdata = dict_ref(xdata);
- if (local->xattr) {
- stripe_aggregate_xattr(local->xdata, local->xattr);
- dict_unref(local->xattr);
- local->xattr = NULL;
- }
- }
-
- if (!local->xdata && !local->xattr) {
- local->xattr = dict_ref(xdata);
- } else if (local->xdata) {
- stripe_aggregate_xattr(local->xdata, xdata);
- } else if (local->xattr) {
- stripe_aggregate_xattr(local->xattr, xdata);
- }
-
- local->stbuf_blocks += buf->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
-
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
-
- /* Make sure the gfid on all the nodes are same */
- if (gf_uuid_compare(local->ia_gfid, buf->ia_gfid)) {
- gf_log(this->name, GF_LOG_WARNING,
- "%s: gfid different on subvolume %s", local->loc.path,
- prev->this->name);
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->op_ret == 0 && local->entry_self_heal_needed &&
- !gf_uuid_is_null(local->loc.inode->gfid))
- stripe_entry_self_heal(frame, this, local);
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_blocks = local->stbuf_blocks;
- local->stbuf.ia_size = local->stbuf_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx);
- }
-
- STRIPE_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xdata,
- &local->postparent);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int64_t filesize = 0;
- int ret = 0;
- uint64_t tmpctx = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- inode_ctx_get(local->inode, this, &tmpctx);
- if (tmpctx)
- local->fctx = (stripe_fd_ctx_t *)(long)tmpctx;
-
- /* quick-read friendly changes */
- if (xdata && dict_get(xdata, GF_CONTENT_KEY)) {
- ret = dict_get_int64(xdata, GF_CONTENT_KEY, &filesize);
- if (!ret && (filesize > priv->block_size))
- dict_del(xdata, GF_CONTENT_KEY);
- }
-
- /* get stripe-size xattr on lookup. This would be required for
- * open/read/write/pathinfo calls. Hence we send down the request
- * even when type == IA_INVAL */
-
- /*
- * We aren't guaranteed to have xdata here. We need the format info for
- * the file, so allocate xdata if necessary.
- */
- if (!xdata)
- xdata = dict_new();
- else
- xdata = dict_ref(xdata);
-
- if (xdata &&
- (IA_ISREG(loc->inode->ia_type) || (loc->inode->ia_type == IA_INVAL))) {
- ret = stripe_xattr_request_build(this, xdata, 8, 4, 4, 0);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to build"
- " xattr request for %s",
- loc->path);
- }
-
- /* Every time in stripe lookup, all child nodes
- should be looked up */
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup, loc, xdata);
- trav = trav->next;
- }
-
- dict_unref(xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- }
-
- local->stbuf_blocks += buf->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- STRIPE_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
- &local->stbuf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- if (IA_ISREG(loc->inode->ia_type)) {
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- while (trav) {
- STACK_WIND(frame, stripe_stat_cbk, trav->xlator,
- trav->xlator->fops->stat, loc, NULL);
- trav = trav->next;
- }
-
- return 0;
-
-err:
- STRIPE_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- int32_t callcnt = 0;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret && (op_errno != ENOTCONN)) {
- local->op_errno = op_errno;
- }
- if (op_ret == 0) {
- struct statvfs *dict_buf = &local->statvfs_buf;
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = 0;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
-
- trav = this->children;
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- frame->local = local;
-
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *prebuf;
- local->post_buf = *postbuf;
- }
-
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
-
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
- int i, eof_idx;
- off_t dest_offset, tmp_offset;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe context");
- op_errno = EINVAL;
- goto err;
- }
-
- local->fctx = fctx;
- eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (!fctx->xl_array[i]) {
- gf_log(this->name, GF_LOG_ERROR, "no xlator at index %d", i);
- op_errno = EINVAL;
- goto err;
- }
-
- if (fctx->stripe_coalesce) {
- /*
- * The node that owns EOF is truncated to the exact
- * coalesced offset. Nodes prior to this index should
- * be rounded up to the size of the complete stripe,
- * while nodes after this index should be rounded down
- * to the size of the previous stripe.
- */
- if (i < eof_idx)
- tmp_offset = gf_roof(offset,
- fctx->stripe_size * fctx->stripe_count);
- else if (i > eof_idx)
- tmp_offset = gf_floor(offset,
- fctx->stripe_size * fctx->stripe_count);
- else
- tmp_offset = offset;
-
- dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size,
- fctx->stripe_count);
- } else {
- dest_offset = offset;
- }
-
- STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
- fctx->xl_array[i]->fops->truncate, loc, dest_offset, NULL);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop,
- struct iatt *postop, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *preop;
- local->post_buf = *postop;
- }
-
- local->prebuf_blocks += preop->ia_blocks;
- local->postbuf_blocks += postop->ia_blocks;
-
- correct_file_size(preop, local->fctx, prev);
- correct_file_size(postop, local->fctx, prev);
-
- if (local->prebuf_size < preop->ia_size)
- local->prebuf_size = preop->ia_size;
- if (local->postbuf_size < postop->ia_size)
- local->postbuf_size = postop->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- if (!IA_ISDIR(loc->inode->ia_type) && !IA_ISREG(loc->inode->ia_type)) {
- local->call_count = 1;
- STACK_WIND(frame, stripe_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, NULL);
- return 0;
- }
-
- if (IA_ISREG(loc->inode->ia_type)) {
- inode_ctx_get(loc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->setattr, loc, stbuf, valid, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_stack_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- local->stbuf.ia_blocks += buf->ia_blocks;
- local->preparent.ia_blocks += preoldparent->ia_blocks;
- local->postparent.ia_blocks += postoldparent->ia_blocks;
- local->pre_buf.ia_blocks += prenewparent->ia_blocks;
- local->post_buf.ia_blocks += postnewparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf.ia_size < buf->ia_size)
- local->stbuf.ia_size = buf->ia_size;
-
- if (local->preparent.ia_size < preoldparent->ia_size)
- local->preparent.ia_size = preoldparent->ia_size;
-
- if (local->postparent.ia_size < postoldparent->ia_size)
- local->postparent.ia_size = postoldparent->ia_size;
-
- if (local->pre_buf.ia_size < prenewparent->ia_size)
- local->pre_buf.ia_size = prenewparent->ia_size;
-
- if (local->post_buf.ia_size < postnewparent->ia_size)
- local->post_buf.ia_size = postnewparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preparent,
- &local->postparent, &local->pre_buf,
- &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_first_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- op_errno = EINVAL;
- goto unwind;
- }
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- local = frame->local;
- trav = this->children;
-
- local->stbuf = *buf;
- local->preparent = *preoldparent;
- local->postparent = *postoldparent;
- local->pre_buf = *prenewparent;
- local->post_buf = *postnewparent;
-
- local->op_ret = 0;
- local->call_count--;
-
- trav = trav->next; /* Skip first child */
- while (trav) {
- STACK_WIND(frame, stripe_stack_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, &local->loc, &local->loc2, NULL);
- trav = trav->next;
- }
- return 0;
-
-unwind:
- STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, NULL);
- return 0;
-}
-
-int32_t
-stripe_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(oldloc->path, err);
- VALIDATE_OR_GOTO(oldloc->inode, err);
- VALIDATE_OR_GOTO(newloc, err);
-
- priv = this->private;
- trav = this->children;
-
- /* If any one node is down, don't allow rename */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- local->op_ret = -1;
- loc_copy(&local->loc, oldloc);
- loc_copy(&local->loc2, newloc);
-
- local->call_count = priv->child_count;
-
- if (IA_ISREG(oldloc->inode->ia_type)) {
- inode_ctx_get(oldloc->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- STACK_WIND(frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc, NULL);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-int32_t
-stripe_first_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- goto out;
- }
- local->op_ret = 0;
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
- return 0;
-out:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-stripe_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if (op_errno != ENOENT) {
- local->failed = 1;
- local->op_ret = op_ret;
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (callcnt == 1) {
- if (local->failed) {
- op_errno = local->op_errno;
- goto out;
- }
- STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
- local->xdata);
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-stripe_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Don't unlink a file if a node is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- loc_copy(&local->loc, loc);
- local->xflag = xflag;
-
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- frame->local = local;
- local->call_count = priv->child_count;
- trav = trav->next; /* Skip the first child */
-
- while (trav) {
- STACK_WIND(frame, stripe_unlink_cbk, trav->xlator,
- trav->xlator->fops->unlink, loc, xflag, xdata);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_first_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- op_errno = EINVAL;
- goto err;
- }
-
- if (op_ret == -1) {
- goto err;
- }
-
- local = frame->local;
- local->op_ret = 0;
-
- local->call_count--; /* First child successful */
-
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->preparent_size = preparent->ia_size;
- local->postparent_size = postparent->ia_size;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- STRIPE_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- if (op_errno != ENOENT)
- local->failed = 1;
- }
- }
- UNLOCK(&frame->lock);
-
- if (callcnt == 1) {
- if (local->failed)
- goto out;
- STACK_WIND(frame, stripe_first_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, &local->loc, local->flags,
- NULL);
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* don't delete a directory if any of the subvolume is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
- local->flags = flags;
- local->call_count = priv->child_count;
- trav = trav->next; /* skip the first child */
-
- while (trav) {
- STACK_WIND(frame, stripe_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_mknod_ifreg_fail_unlink_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-/**
- */
-int32_t
-stripe_mknod_ifreg_setxattr_cbk(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->op_ret == -1) {
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
- return 0;
- }
-
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_mknod_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
-
- /* Can be used as a mechanism to understand if mknod
- was successful in at least one place */
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
-
- if (stripe_ctx_handle(this, prev, local, xdata))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from dict");
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if ((local->op_ret == -1) && !gf_uuid_is_null(local->ia_gfid)) {
- /* ia_gfid set means, at least on one node 'mknod'
- is successful */
- local->call_count = priv->child_count;
- trav = this->children;
- while (trav) {
- STACK_WIND(frame, stripe_mknod_ifreg_fail_unlink_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
- return 0;
- }
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- inode_ctx_put(local->inode, this, (uint64_t)(long)local->fctx);
- }
- STRIPE_STACK_UNWIND(mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_mknod_first_ifreg_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
- int i = 1;
- dict_t *dict = NULL;
- int ret = 0;
- int need_unref = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
- trav = this->children;
-
- local->call_count--;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- goto out;
- }
-
- local->op_ret = op_ret;
-
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
-
- if (gf_uuid_is_null(local->ia_gfid))
- gf_uuid_copy(local->ia_gfid, buf->ia_gfid);
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
-
- trav = trav->next;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- local->loc.path);
- }
- need_unref = 1;
-
- dict_copy(local->xattr, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to build xattr request");
-
- } else {
- dict = local->xattr;
- }
-
- STACK_WIND(frame, stripe_mknod_ifreg_cbk, trav->xlator,
- trav->xlator->fops->mknod, &local->loc, local->mode,
- local->rdev, 0, dict);
- trav = trav->next;
- i++;
-
- if (dict && need_unref)
- dict_unref(dict);
- }
-
- return 0;
-
-out:
-
- STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_single_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int
-stripe_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int32_t op_errno = EINVAL;
- int32_t i = 0;
- dict_t *dict = NULL;
- int ret = 0;
- int need_unref = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- if (S_ISREG(mode)) {
- /* NOTE: on older kernels (older than 2.6.9),
- creat() fops is sent as mknod() + open(). Hence handling
- S_IFREG files is necessary */
- if (priv->nodes_down) {
- gf_log(this->name, GF_LOG_WARNING, "Some node down, returning EIO");
- op_errno = EIO;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
- frame->local = local;
- local->inode = inode_ref(loc->inode);
- loc_copy(&local->loc, loc);
- local->xattr = dict_copy_with_ref(xdata, NULL);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
-
- /* Every time in stripe lookup, all child nodes should
- be looked up */
- local->call_count = priv->child_count;
-
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(xdata, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = xdata;
- }
-
- STACK_WIND(frame, stripe_mknod_first_ifreg_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- dict);
-
- if (dict && need_unref)
- dict_unref(dict);
- return 0;
- }
-
- STACK_WIND(frame, stripe_single_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
- STRIPE_STACK_UNWIND(mkdir, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_first_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- trav = this->children;
-
- local->call_count--; /* first child is successful */
- trav = trav->next; /* skip first child */
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- goto out;
- }
-
- local->op_ret = 0;
-
- local->inode = inode_ref(inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- local->stbuf_size = buf->ia_size;
- local->preparent_size = preparent->ia_size;
- local->postparent_size = postparent->ia_size;
-
- while (trav) {
- STACK_WIND(frame, stripe_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, &local->loc, local->mode,
- local->umask, local->xdata);
- trav = trav->next;
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
-
- return 0;
-}
-
-int
-stripe_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->call_count = priv->child_count;
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->mode = mode;
- local->umask = umask;
- loc_copy(&local->loc, loc);
- frame->local = local;
-
- /* Every time in stripe lookup, all child nodes should be looked up */
- STACK_WIND(frame, stripe_first_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, loc, mode, umask, xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- stripe_fd_ctx_t *fctx = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (IA_ISREG(inode->ia_type)) {
- inode_ctx_get(inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR,
- "failed to get stripe context");
- op_ret = -1;
- op_errno = EINVAL;
- }
- }
-
- if (FIRST_CHILD(this) == prev->this) {
- local->inode = inode_ref(inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
- }
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
- STRIPE_STACK_UNWIND(link, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(oldloc, err);
- VALIDATE_OR_GOTO(oldloc->path, err);
- VALIDATE_OR_GOTO(oldloc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* If any one node is down, don't allow link operation */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- /* Every time in stripe lookup, all child
- nodes should be looked up */
- while (trav) {
- STACK_WIND(frame, stripe_link_cbk, trav->xlator,
- trav->xlator->fops->link, oldloc, newloc, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-stripe_create_fail_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- if (IA_ISREG(buf->ia_type)) {
- if (stripe_ctx_handle(this, prev, local, xdata))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from "
- "dict");
- }
-
- local->op_ret = op_ret;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret == -1) {
- local->call_count = priv->child_count;
- trav = this->children;
- while (trav) {
- STACK_WIND(frame, stripe_create_fail_unlink_cbk, trav->xlator,
- trav->xlator->fops->unlink, &local->loc, 0, NULL);
- trav = trav->next;
- }
-
- return 0;
- }
-
- if (local->op_ret >= 0) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
-
- stripe_copy_xl_array(local->fctx->xl_array, priv->xl_array,
- local->fctx->stripe_count);
- inode_ctx_put(local->inode, this, (uint64_t)(uintptr_t)local->fctx);
- }
-
- /* Create itself has failed.. so return
- without setxattring */
- STRIPE_STACK_UNWIND(create, frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_first_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
- int i = 1;
- dict_t *dict = NULL;
- loc_t *loc = NULL;
- int32_t need_unref = 0;
- int32_t ret = -1;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
- trav = this->children;
- loc = &local->loc;
-
- --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- local->op_ret = 0;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret == -1) {
- local->call_count = 1;
- STACK_WIND(frame, stripe_create_fail_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc, 0, NULL);
- return 0;
- }
-
- if (local->op_ret >= 0) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- /* Send a setxattr request to nodes where the
- files are created */
- trav = trav->next;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(local->xattr, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = local->xattr;
- }
-
- STACK_WIND(frame, stripe_create_cbk, trav->xlator,
- trav->xlator->fops->create, &local->loc, local->flags,
- local->mode, local->umask, local->fd, dict);
- trav = trav->next;
- if (need_unref && dict)
- dict_unref(dict);
- i++;
- }
-
-out:
- return 0;
-}
-
-/**
- * stripe_create - If a block-size is specified for the 'name', create the
- * file in all the child nodes. If not, create it in only first child.
- *
- * @name- complete path of the file to be created.
- */
-int32_t
-stripe_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int32_t op_errno = EINVAL;
- int ret = 0;
- int need_unref = 0;
- int i = 0;
- dict_t *dict = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
-
- /* files created in O_APPEND mode does not allow lseek() on fd */
- flags &= ~O_APPEND;
-
- if (priv->first_child_down || priv->nodes_down) {
- gf_log(this->name, GF_LOG_DEBUG, "First node down, returning EIO");
- op_errno = EIO;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
- frame->local = local;
- local->inode = inode_ref(loc->inode);
- loc_copy(&local->loc, loc);
- local->fd = fd_ref(fd);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- if (xdata)
- local->xattr = dict_ref(xdata);
-
- local->call_count = priv->child_count;
- /* Send a setxattr request to nodes where the
- files are created */
-
- if (priv->xattr_supported) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate dict %s",
- loc->path);
- }
- need_unref = 1;
-
- dict_copy(xdata, dict);
-
- ret = stripe_xattr_request_build(this, dict, local->stripe_size,
- priv->child_count, i, priv->coalesce);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR, "failed to build xattr request");
- } else {
- dict = xdata;
- }
-
- STACK_WIND(frame, stripe_first_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- dict);
-
- if (need_unref && dict)
- dict_unref(dict);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, xdata);
- return 0;
-}
-
-int32_t
-stripe_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
- local->fd, xdata);
- }
-out:
- return 0;
-}
-
-/**
- * stripe_open -
- */
-int32_t
-stripe_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- /* files opened in O_APPEND mode does not allow lseek() on fd */
- flags &= ~O_APPEND;
-
- local->fd = fd_ref(fd);
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- /* Striped files */
- local->flags = flags;
- local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs(loc->path, priv);
-
- while (trav) {
- STACK_WIND(frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc, local->flags,
- local->fd, xdata);
- trav = trav->next;
- }
- return 0;
-err:
- STRIPE_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
- local->fd, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->call_count = priv->child_count;
- local->fd = fd_ref(fd);
-
- while (trav) {
- STACK_WIND(frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0) {
- if (FIRST_CHILD(this) == prev->this) {
- /* First successful call, copy the *lock */
- local->op_ret = op_ret;
- local->lock = *lock;
- }
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
- STRIPE_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
- &local->lock, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- trav = this->children;
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_lk_cbk, trav->xlator, trav->xlator->fops->lk,
- fd, cmd, lock, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_flush_cbk, trav->xlator,
- trav->xlator->fops->flush, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *prebuf;
- local->post_buf = *postbuf;
- }
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
-
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
- &local->pre_buf, &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- op_errno = EINVAL;
- goto err;
- }
- local->fctx = fctx;
- local->op_ret = -1;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsync_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this)
- local->stbuf = *buf;
-
- local->stbuf_blocks += buf->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- STRIPE_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
- &local->stbuf, NULL);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- if (IA_ISREG(fd->inode->ia_type)) {
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- while (trav) {
- STACK_WIND(frame, stripe_fstat_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int i, eof_idx;
- off_t dest_offset, tmp_offset;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- inode_ctx_get(fd->inode, this, (uint64_t *)&fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe context");
- op_errno = EINVAL;
- goto err;
- }
- if (!fctx->stripe_count) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe count");
- op_errno = EINVAL;
- goto err;
- }
-
- local->fctx = fctx;
- eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (!fctx->xl_array[i]) {
- gf_log(this->name, GF_LOG_ERROR,
- "no xlator at index "
- "%d",
- i);
- op_errno = EINVAL;
- goto err;
- }
-
- if (fctx->stripe_coalesce) {
- if (i < eof_idx)
- tmp_offset = gf_roof(offset,
- fctx->stripe_size * fctx->stripe_count);
- else if (i > eof_idx)
- tmp_offset = gf_floor(offset,
- fctx->stripe_size * fctx->stripe_count);
- else
- tmp_offset = offset;
-
- dest_offset = coalesced_offset(tmp_offset, fctx->stripe_size,
- fctx->stripe_count);
- } else {
- dest_offset = offset;
- }
-
- STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
- fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, NULL);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG, "%s returned %s", prev->this->name,
- strerror(op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) || (prev->this == FIRST_CHILD(this)))
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
- NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsyncdir_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-stripe_readv_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- int32_t i = 0;
- int32_t callcnt = 0;
- int32_t count = 0;
- stripe_local_t *local = NULL;
- struct iovec *vec = NULL;
- struct iatt tmp_stbuf = {
- 0,
- };
- struct iobref *tmp_iobref = NULL;
- struct iobuf *iobuf = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
- prev = cookie;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret != -1) {
- correct_file_size(buf, local->fctx, prev);
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- op_ret = 0;
-
- /* Keep extra space for filling in '\0's */
- vec = GF_CALLOC((local->count * 2), sizeof(struct iovec),
- gf_stripe_mt_iovec);
- if (!vec) {
- op_ret = -1;
- goto done;
- }
-
- for (i = 0; i < local->wind_count; i++) {
- if (local->replies[i].op_ret) {
- memcpy((vec + count), local->replies[i].vector,
- (local->replies[i].count * sizeof(struct iovec)));
- count += local->replies[i].count;
- op_ret += local->replies[i].op_ret;
- }
- if ((local->replies[i].op_ret < local->replies[i].requested_size) &&
- (local->stbuf_size > (local->offset + op_ret))) {
- /* Fill in 0s here */
- vec[count].iov_len = (local->replies[i].requested_size -
- local->replies[i].op_ret);
- iobuf = iobuf_get2(this->ctx->iobuf_pool, vec[count].iov_len);
- if (!iobuf) {
- gf_log(this->name, GF_LOG_ERROR, "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto done;
- }
- memset(iobuf->ptr, 0, vec[count].iov_len);
- vec[count].iov_base = iobuf->ptr;
-
- iobref_add(local->iobref, iobuf);
- iobuf_unref(iobuf);
-
- op_ret += vec[count].iov_len;
- count++;
- }
- GF_FREE(local->replies[i].vector);
- }
-
- /* ENOENT signals EOF to the NFS-server */
- if (op_ret != -1 && op_ret < local->readv_size &&
- (local->offset + op_ret == buf->ia_size))
- op_errno = ENOENT;
-
- /* FIXME: notice that st_ino, and st_dev (gen) will be
- * different than what inode will have. Make sure this doesn't
- * cause any bugs at higher levels */
- memcpy(&tmp_stbuf, &local->replies[0].stbuf, sizeof(struct iatt));
- tmp_stbuf.ia_size = local->stbuf_size;
-
- done:
- GF_FREE(local->replies);
- tmp_iobref = local->iobref;
- STRIPE_STACK_UNWIND(readv, frame, op_ret, op_errno, vec, count,
- &tmp_stbuf, tmp_iobref, NULL);
-
- iobref_unref(tmp_iobref);
- GF_FREE(vec);
- }
-out:
- return 0;
-}
-
-/**
- * stripe_readv_cbk - get all the striped reads, and order it properly, send it
- * to above layer after putting it in a single vector.
- */
-int32_t
-stripe_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int32_t final_count = 0;
- int32_t need_to_check_proper_size = 0;
- call_frame_t *mframe = NULL;
- stripe_local_t *mlocal = NULL;
- stripe_local_t *local = NULL;
- struct iovec *final_vec = NULL;
- struct iatt tmp_stbuf = {
- 0,
- };
- struct iatt *tmp_stbuf_p = NULL; // need it for a warning
- struct iobref *tmp_iobref = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto end;
- }
-
- local = frame->local;
- index = local->node_index;
- prev = cookie;
- mframe = local->orig_frame;
- if (!mframe)
- goto out;
-
- mlocal = mframe->local;
- if (!mlocal)
- goto out;
-
- fctx = mlocal->fctx;
-
- LOCK(&mframe->lock);
- {
- mlocal->replies[index].op_ret = op_ret;
- mlocal->replies[index].op_errno = op_errno;
- mlocal->replies[index].requested_size = local->readv_size;
- if (op_ret >= 0) {
- mlocal->replies[index].stbuf = *stbuf;
- mlocal->replies[index].count = count;
- mlocal->replies[index].vector = iov_dup(vector, count);
-
- correct_file_size(stbuf, fctx, prev);
-
- if (local->stbuf_size < stbuf->ia_size)
- local->stbuf_size = stbuf->ia_size;
- local->stbuf_blocks += stbuf->ia_blocks;
-
- if (!mlocal->iobref)
- mlocal->iobref = iobref_new();
- iobref_merge(mlocal->iobref, iobref);
- }
- callcnt = ++mlocal->call_count;
- }
- UNLOCK(&mframe->lock);
-
- if (callcnt == mlocal->wind_count) {
- op_ret = 0;
-
- for (index = 0; index < mlocal->wind_count; index++) {
- /* check whether each stripe returned
- * 'expected' number of bytes */
- if (mlocal->replies[index].op_ret == -1) {
- op_ret = -1;
- op_errno = mlocal->replies[index].op_errno;
- break;
- }
- /* TODO: handle the 'holes' within the read range
- properly */
- if (mlocal->replies[index].op_ret <
- mlocal->replies[index].requested_size) {
- need_to_check_proper_size = 1;
- }
-
- op_ret += mlocal->replies[index].op_ret;
- mlocal->count += mlocal->replies[index].count;
- }
- if (op_ret == -1)
- goto done;
- if (need_to_check_proper_size)
- goto check_size;
-
- final_vec = GF_CALLOC(mlocal->count, sizeof(struct iovec),
- gf_stripe_mt_iovec);
-
- if (!final_vec) {
- op_ret = -1;
- goto done;
- }
-
- for (index = 0; index < mlocal->wind_count; index++) {
- memcpy((final_vec + final_count), mlocal->replies[index].vector,
- (mlocal->replies[index].count * sizeof(struct iovec)));
- final_count += mlocal->replies[index].count;
- GF_FREE(mlocal->replies[index].vector);
- }
-
- /* FIXME: notice that st_ino, and st_dev (gen) will be
- * different than what inode will have. Make sure this doesn't
- * cause any bugs at higher levels */
- memcpy(&tmp_stbuf, &mlocal->replies[0].stbuf, sizeof(struct iatt));
- tmp_stbuf.ia_size = local->stbuf_size;
- tmp_stbuf.ia_blocks = local->stbuf_blocks;
-
- done:
- /* */
- GF_FREE(mlocal->replies);
- tmp_iobref = mlocal->iobref;
- /* work around for nfs truncated read. Bug 3774 */
- tmp_stbuf_p = &tmp_stbuf;
- WIPE(tmp_stbuf_p);
- STRIPE_STACK_UNWIND(readv, mframe, op_ret, op_errno, final_vec,
- final_count, &tmp_stbuf, tmp_iobref, NULL);
-
- iobref_unref(tmp_iobref);
- GF_FREE(final_vec);
- }
-
- goto out;
-
-check_size:
- mlocal->call_count = fctx->stripe_count;
-
- for (index = 0; index < fctx->stripe_count; index++) {
- STACK_WIND(mframe, stripe_readv_fstat_cbk, (fctx->xl_array[index]),
- (fctx->xl_array[index])->fops->fstat, mlocal->fd, NULL);
- }
-
-out:
- STRIPE_STACK_DESTROY(frame);
-end:
- return 0;
-}
-
-int32_t
-stripe_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- int32_t idx = 0;
- int32_t index = 0;
- int32_t num_stripe = 0;
- int32_t off_index = 0;
- size_t frame_size = 0;
- off_t rounded_end = 0;
- uint64_t tmp_fctx = 0;
- uint64_t stripe_size = 0;
- off_t rounded_start = 0;
- off_t frame_offset = offset;
- off_t dest_offset = 0;
- stripe_local_t *local = NULL;
- call_frame_t *rframe = NULL;
- stripe_local_t *rlocal = NULL;
- stripe_fd_ctx_t *fctx = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EBADFD;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- goto err;
- }
- /* The file is stripe across the child nodes. Send the read request
- * to the child nodes appropriately after checking which region of
- * the file is in which child node. Always '0-<stripe_size>' part of
- * the file resides in the first child.
- */
- rounded_start = gf_floor(offset, stripe_size);
- rounded_end = gf_roof(offset + size, stripe_size);
- num_stripe = (rounded_end - rounded_start) / stripe_size;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
-
- /* This is where all the vectors should be copied. */
- local->replies = GF_CALLOC(num_stripe, sizeof(struct stripe_replies),
- gf_stripe_mt_stripe_replies);
- if (!local->replies) {
- op_errno = ENOMEM;
- goto err;
- }
-
- off_index = (offset / stripe_size) % fctx->stripe_count;
- local->wind_count = num_stripe;
- local->readv_size = size;
- local->offset = offset;
- local->fd = fd_ref(fd);
- local->fctx = fctx;
-
- for (index = off_index; index < (num_stripe + off_index); index++) {
- rframe = copy_frame(frame);
- rlocal = mem_get0(this->local_pool);
- if (!rlocal) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame_size = min(gf_roof(frame_offset + 1, stripe_size),
- (offset + size)) -
- frame_offset;
-
- rlocal->node_index = index - off_index;
- rlocal->orig_frame = frame;
- rlocal->readv_size = frame_size;
- rframe->local = rlocal;
- idx = (index % fctx->stripe_count);
-
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(frame_offset, stripe_size,
- fctx->stripe_count);
- else
- dest_offset = frame_offset;
-
- STACK_WIND(rframe, stripe_readv_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->readv, fd, frame_size,
- dest_offset, flags, xdata);
-
- frame_offset += frame_size;
- }
-
- return 0;
-err:
- if (rframe)
- STRIPE_STACK_DESTROY(rframe);
-
- STRIPE_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
- struct stripe_replies *reply = NULL;
- int32_t i = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- mlocal->replies[local->node_index].op_ret = op_ret;
- mlocal->replies[local->node_index].op_errno = op_errno;
-
- if (op_ret >= 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- /*
- * Only return the number of consecutively written bytes up until
- * the first error. Only return an error if it occurs first.
- *
- * When a short write occurs, the application should retry at the
- * appropriate offset, at which point we'll potentially pass back
- * the error.
- */
- for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
- i++, reply++) {
- if (reply->op_ret == -1) {
- gf_log(this->name, GF_LOG_DEBUG,
- "reply %d "
- "returned error %s",
- i, strerror(reply->op_errno));
- if (!mlocal->op_ret) {
- mlocal->op_ret = -1;
- mlocal->op_errno = reply->op_errno;
- }
- break;
- }
-
- mlocal->op_ret += reply->op_ret;
-
- if (reply->op_ret < reply->requested_size)
- break;
- }
-
- GF_FREE(mlocal->replies);
-
- STRIPE_STACK_UNWIND(writev, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- struct iovec *tmp_vec = NULL;
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t total_size = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- int32_t tmp_count = count;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- off_t rounded_start = 0;
- off_t rounded_end = 0;
- int32_t total_chunks = 0;
- call_frame_t *wframe = NULL;
- stripe_local_t *wlocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- /* File has to be stripped across the child nodes */
- total_size = iov_length(vector, count);
- remaining_size = total_size;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- rounded_start = gf_floor(offset, stripe_size);
- rounded_end = gf_roof(offset + total_size, stripe_size);
- total_chunks = (rounded_end - rounded_start) / stripe_size;
- local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
- gf_stripe_mt_stripe_replies);
- if (!local->replies) {
- op_errno = ENOMEM;
- goto err;
- }
-
- total_chunks = 0;
- while (1) {
- wframe = copy_frame(frame);
- wlocal = mem_get0(this->local_pool);
- if (!wlocal) {
- op_errno = ENOMEM;
- goto err;
- }
- wlocal->orig_frame = frame;
- wframe->local = wlocal;
-
- /* Send striped chunk of the vector to child
- nodes appropriately. */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- tmp_count = iov_subset(vector, count, offset_offset,
- offset_offset + fill_size, NULL);
- tmp_vec = GF_CALLOC(tmp_count, sizeof(struct iovec),
- gf_stripe_mt_iovec);
- if (!tmp_vec) {
- op_errno = ENOMEM;
- goto err;
- }
- tmp_count = iov_subset(vector, count, offset_offset,
- offset_offset + fill_size, tmp_vec);
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- /*
- * Store off the request index (with respect to the chunk of the
- * initial offset) and the size of the request. This is required
- * in the callback to calculate an appropriate return value in
- * the event of a write failure in one or more requests.
- */
- wlocal->node_index = total_chunks;
- local->replies[total_chunks].requested_size = fill_size;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- STACK_WIND(wframe, stripe_writev_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->writev, fd, tmp_vec, tmp_count,
- dest_offset, flags, iobref, xdata);
-
- GF_FREE(tmp_vec);
- offset_offset += fill_size;
- total_chunks++;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (wframe)
- STRIPE_STACK_DESTROY(wframe);
-
- STRIPE_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(fallocate, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
- off_t offset, size_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- /* send fallocate request to the associated child node */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- /*
- * TODO: Create a separate handler for coalesce mode that sends a
- * single fallocate per-child (since the ranges are linear).
- */
- STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->fallocate, fd, mode, dest_offset,
- fill_size, xdata);
-
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(discard, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- if (frame)
- STRIPE_STACK_DESTROY(frame);
-
- return 0;
-}
-
-int32_t
-stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- size_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- /* send discard request to the associated child node */
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- /*
- * TODO: Create a separate handler for coalesce mode that sends a
- * single discard per-child (since the ranges are linear).
- */
- STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->discard, fd, dest_offset,
- fill_size, xdata);
-
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_local_t *mlocal = NULL;
- call_frame_t *prev = NULL;
- call_frame_t *mframe = NULL;
-
- GF_ASSERT(frame);
-
- if (!this || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- mframe = local->orig_frame;
- mlocal = mframe->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++mlocal->call_count;
-
- if (op_ret == 0) {
- mlocal->post_buf = *postbuf;
- mlocal->pre_buf = *prebuf;
-
- mlocal->prebuf_blocks += prebuf->ia_blocks;
- mlocal->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, mlocal->fctx, prev);
- correct_file_size(postbuf, mlocal->fctx, prev);
-
- if (mlocal->prebuf_size < prebuf->ia_size)
- mlocal->prebuf_size = prebuf->ia_size;
- if (mlocal->postbuf_size < postbuf->ia_size)
- mlocal->postbuf_size = postbuf->ia_size;
- }
-
- /* return the first failure */
- if (mlocal->op_ret == 0) {
- mlocal->op_ret = op_ret;
- mlocal->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
- mlocal->pre_buf.ia_size = mlocal->prebuf_size;
- mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
- mlocal->post_buf.ia_size = mlocal->postbuf_size;
- mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
-
- STRIPE_STACK_UNWIND(zerofill, mframe, mlocal->op_ret, mlocal->op_errno,
- &mlocal->pre_buf, &mlocal->post_buf, NULL);
- }
-out:
- STRIPE_STACK_DESTROY(frame);
- return 0;
-}
-
-int32_t
-stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- off_t len, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
- call_frame_t *fframe = NULL;
- stripe_local_t *flocal = NULL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
- VALIDATE_OR_GOTO(fd->inode, err);
-
- inode_ctx_get(fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX(fctx, err);
-
- remaining_size = len;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log(this->name, GF_LOG_DEBUG, "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- fframe = copy_frame(frame);
- flocal = mem_get0(this->local_pool);
- if (!flocal) {
- op_errno = ENOMEM;
- goto err;
- }
- flocal->orig_frame = frame;
- fframe->local = flocal;
-
- idx = (((offset + offset_offset) / local->stripe_size) %
- fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- dest_offset = offset + offset_offset;
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(dest_offset, local->stripe_size,
- fctx->stripe_count);
-
- STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->zerofill, fd, dest_offset,
- fill_size, xdata);
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- if (fframe)
- STRIPE_STACK_DESTROY(fframe);
-
- STRIPE_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- gf_seek_what_t what, dict_t *xdata)
-{
- /* TBD */
- gf_log(this->name, GF_LOG_INFO, "seek called on %s.",
- uuid_utoa(fd->inode->gfid));
- STRIPE_STACK_UNWIND(seek, frame, -1, ENOTSUP, 0, NULL);
- return 0;
-}
-
-int32_t
-stripe_release(xlator_t *this, fd_t *fd)
-{
- return 0;
-}
-
-int
-stripe_forget(xlator_t *this, inode_t *inode)
-{
- uint64_t tmp_fctx = 0;
- stripe_fd_ctx_t *fctx = NULL;
-
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(inode, err);
-
- (void)inode_ctx_del(inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- goto err;
- }
-
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
-
- if (!fctx->static_array)
- GF_FREE(fctx->xl_array);
-
- GF_FREE(fctx);
-err:
- return 0;
-}
-
-int32_t
-notify(xlator_t *this, int32_t event, void *data, ...)
-{
- stripe_private_t *priv = NULL;
- int down_client = 0;
- int i = 0;
- gf_boolean_t heard_from_all_children = _gf_false;
-
- if (!this)
- return 0;
-
- priv = this->private;
- if (!priv)
- return 0;
-
- switch (event) {
- case GF_EVENT_CHILD_UP: {
- /* get an index number to set */
- for (i = 0; i < priv->child_count; i++) {
- if (data == priv->xl_array[i])
- break;
- }
-
- if (priv->child_count == i) {
- gf_log(this->name, GF_LOG_ERROR,
- "got GF_EVENT_CHILD_UP bad subvolume %s",
- data ? ((xlator_t *)data)->name : NULL);
- break;
- }
-
- LOCK(&priv->lock);
- {
- if (data == FIRST_CHILD(this))
- priv->first_child_down = 0;
- priv->last_event[i] = event;
- }
- UNLOCK(&priv->lock);
- } break;
- case GF_EVENT_CHILD_CONNECTING: {
- // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
- goto out;
- }
- case GF_EVENT_CHILD_DOWN: {
- /* get an index number to set */
- for (i = 0; i < priv->child_count; i++) {
- if (data == priv->xl_array[i])
- break;
- }
-
- if (priv->child_count == i) {
- gf_log(this->name, GF_LOG_ERROR,
- "got GF_EVENT_CHILD_DOWN bad subvolume %s",
- data ? ((xlator_t *)data)->name : NULL);
- break;
- }
-
- LOCK(&priv->lock);
- {
- if (data == FIRST_CHILD(this))
- priv->first_child_down = 1;
- priv->last_event[i] = event;
- }
- UNLOCK(&priv->lock);
- } break;
-
- default: {
- /* */
- default_notify(this, event, data);
- goto out;
- } break;
- }
-
- // Consider child as down if it's last_event is not CHILD_UP
- for (i = 0, down_client = 0; i < priv->child_count; i++)
- if (priv->last_event[i] != GF_EVENT_CHILD_UP)
- down_client++;
-
- LOCK(&priv->lock);
- {
- priv->nodes_down = down_client;
- }
- UNLOCK(&priv->lock);
-
- heard_from_all_children = _gf_true;
- for (i = 0; i < priv->child_count; i++)
- if (!priv->last_event[i])
- heard_from_all_children = _gf_false;
-
- if (heard_from_all_children)
- default_notify(this, event, data);
-out:
- return 0;
-}
-
-int
-stripe_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- int ret = -1;
- int call_cnt = 0;
- stripe_local_t *local = NULL;
-
- if (!frame || !frame->local || !this) {
- gf_log("", GF_LOG_ERROR, "Possible NULL deref");
- return ret;
- }
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_cnt = --local->wind_count;
-
- /**
- * We overwrite ->op_* values here for subsequent failure
- * conditions, hence we propagate the last errno down the
- * stack.
- */
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unlock;
- }
- }
-
-unlock:
- UNLOCK(&frame->lock);
-
- if (!call_cnt) {
- STRIPE_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
- xdata);
- }
-
- return 0;
-}
-
-#ifdef HAVE_BD_XLATOR
-int
-stripe_is_bd(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_boolean_t *is_bd = data;
-
- if (data == NULL)
- return 0;
-
- if (XATTR_IS_BD(key))
- *is_bd = _gf_true;
-
- return 0;
-}
-
-static gf_boolean_t
-stripe_setxattr_is_bd(dict_t *dict)
-{
- gf_boolean_t is_bd = _gf_false;
-
- if (dict == NULL)
- goto out;
-
- dict_foreach(dict, stripe_is_bd, &is_bd);
-out:
- return is_bd;
-}
-#else
-#define stripe_setxattr_is_bd(dict) _gf_false
-#endif
-
-int
-stripe_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int flags, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int i = 0;
- gf_boolean_t is_bd = _gf_false;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err);
-
- priv = this->private;
- trav = this->children;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
- local->wind_count = priv->child_count;
- local->op_ret = local->op_errno = 0;
-
- is_bd = stripe_setxattr_is_bd(dict);
-
- /**
- * Set xattrs for directories on all subvolumes. Additionally
- * this power is only given to a special client. Bd xlator
- * also needs xattrs for regular files (ie LVs)
- */
- if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
- IA_ISDIR(loc->inode->ia_type)) ||
- is_bd) {
- for (i = 0; i < priv->child_count; i++, trav = trav->next) {
- STACK_WIND(frame, stripe_setxattr_cbk, trav->xlator,
- trav->xlator->fops->setxattr, loc, dict, flags, xdata);
- }
- } else {
- local->wind_count = 1;
- STACK_WIND(frame, stripe_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_is_special_key(dict_t *this, char *key, data_t *value, void *data)
-{
- gf_boolean_t *is_special = NULL;
-
- if (data == NULL) {
- goto out;
- }
-
- is_special = data;
-
- if (XATTR_IS_LOCKINFO(key) || XATTR_IS_BD(key))
- *is_special = _gf_true;
-
-out:
- return 0;
-}
-
-int32_t
-stripe_fsetxattr_everyone_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int call_count = 0;
- stripe_local_t *local = NULL;
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_count = --local->wind_count;
-
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
- STRIPE_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
- NULL);
- }
- return 0;
-}
-
-int
-stripe_fsetxattr_to_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int flags, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int ret = -1;
- stripe_local_t *local = NULL;
-
- priv = this->private;
-
- local = mem_get0(this->local_pool);
- if (local == NULL) {
- goto out;
- }
-
- frame->local = local;
-
- local->wind_count = priv->child_count;
-
- trav = this->children;
-
- while (trav) {
- STACK_WIND(frame, stripe_fsetxattr_everyone_cbk, trav->xlator,
- trav->xlator->fops->fsetxattr, fd, dict, flags, xdata);
- trav = trav->next;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-static gf_boolean_t
-stripe_fsetxattr_is_special(dict_t *dict)
-{
- gf_boolean_t is_spl = _gf_false;
-
- if (dict == NULL) {
- goto out;
- }
-
- dict_foreach(dict, stripe_is_special_key, &is_spl);
-
-out:
- return is_spl;
-}
-
-int
-stripe_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int flags, dict_t *xdata)
-{
- int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
- gf_boolean_t is_spl = _gf_false;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- GF_IF_INTERNAL_XATTR_GOTO("trusted.*stripe*", dict, op_errno, err);
-
- is_spl = stripe_fsetxattr_is_special(dict);
- if (is_spl) {
- ret = stripe_fsetxattr_to_everyone(frame, this, fd, dict, flags, xdata);
- if (ret < 0) {
- op_errno = ENOMEM;
- goto err;
- }
-
- goto out;
- }
-
- STACK_WIND(frame, stripe_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
-out:
- return 0;
-err:
- STRIPE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(this, err);
-
- GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err);
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(loc, err);
-
- STACK_WIND(frame, stripe_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- GF_IF_NATIVE_XATTR_GOTO("trusted.*stripe*", name, op_errno, err);
-
- STACK_WIND(frame, stripe_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-stripe_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr,
- struct iatt *parent)
-{
- stripe_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
- stripe_local_t *main_local = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- int done = 0;
-
- local = frame->local;
- prev = cookie;
-
- entry = local->dirent;
-
- main_frame = local->orig_frame;
- main_local = main_frame->local;
- LOCK(&frame->lock);
- {
- local->call_count--;
- if (!local->call_count)
- done = 1;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- goto unlock;
- }
-
- if (stripe_ctx_handle(this, prev, local, xattr))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from dict.");
-
- correct_file_size(stbuf, local->fctx, prev);
-
- stripe_iatt_merge(stbuf, &entry->d_stat);
- local->stbuf_blocks += stbuf->ia_blocks;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (done) {
- inode_ctx_put(entry->inode, this, (uint64_t)(long)local->fctx);
-
- done = 0;
- LOCK(&main_frame->lock);
- {
- main_local->wind_count--;
- if (!main_local->wind_count)
- done = 1;
- if (local->op_ret == -1) {
- main_local->op_errno = local->op_errno;
- main_local->op_ret = local->op_ret;
- }
- entry->d_stat.ia_blocks = local->stbuf_blocks;
- }
- UNLOCK(&main_frame->lock);
- if (done) {
- main_frame->local = NULL;
- STRIPE_STACK_UNWIND(readdir, main_frame, main_local->op_ret,
- main_local->op_errno, &main_local->entries,
- NULL);
- gf_dirent_free(&main_local->entries);
- stripe_local_wipe(main_local);
- mem_put(main_local);
- }
- frame->local = NULL;
- stripe_local_wipe(local);
- mem_put(local);
- STRIPE_STACK_DESTROY(frame);
- }
-
- return 0;
-}
-
-int32_t
-stripe_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- gf_dirent_t *local_entry = NULL;
- gf_dirent_t *tmp_entry = NULL;
- xlator_list_t *trav = NULL;
- loc_t loc = {
- 0,
- };
- int32_t count = 0;
- stripe_private_t *priv = NULL;
- int32_t subvols = 0;
- dict_t *xattrs = NULL;
- call_frame_t *local_frame = NULL;
- stripe_local_t *local_ent = NULL;
-
- if (!this || !frame->local || !cookie) {
- gf_log("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- prev = cookie;
- local = frame->local;
- trav = this->children;
- priv = this->private;
-
- subvols = priv->child_count;
-
- LOCK(&frame->lock);
- {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
-
- if (op_ret != -1) {
- list_splice_init(&orig_entries->list, &local->entries.list);
- local->wind_count = op_ret;
- }
- }
- UNLOCK(&frame->lock);
-
- if (op_ret == -1) {
- gf_log(this->name, GF_LOG_WARNING, "%s returned error %s",
- prev->this->name, strerror(op_errno));
- goto out;
- }
-
- xattrs = dict_new();
- if (xattrs)
- (void)stripe_xattr_request_build(this, xattrs, 0, 0, 0, 0);
- count = op_ret;
- list_for_each_entry_safe(local_entry, tmp_entry, (&local->entries.list),
- list)
- {
- if (!local_entry)
- break;
- if (!IA_ISREG(local_entry->d_stat.ia_type) || !local_entry->inode) {
- LOCK(&frame->lock);
- {
- local->wind_count--;
- count = local->wind_count;
- }
- UNLOCK(&frame->lock);
- continue;
- }
-
- local_frame = copy_frame(frame);
-
- if (!local_frame) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- local_ent = mem_get0(this->local_pool);
- if (!local_ent) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- loc.inode = inode_ref(local_entry->inode);
-
- gf_uuid_copy(loc.gfid, local_entry->d_stat.ia_gfid);
-
- local_ent->orig_frame = frame;
-
- local_ent->call_count = subvols;
-
- local_ent->dirent = local_entry;
-
- local_frame->local = local_ent;
-
- trav = this->children;
- while (trav) {
- STACK_WIND(local_frame, stripe_readdirp_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup, &loc, xattrs);
- trav = trav->next;
- }
- loc_wipe(&loc);
- }
-out:
- if (!count) {
- /* all entries are directories */
- frame->local = NULL;
- STRIPE_STACK_UNWIND(readdir, frame, (local ? local->op_ret : -1),
- (local ? local->op_errno : EINVAL),
- (local ? &local->entries : NULL), NULL);
- gf_dirent_free(&local->entries);
- stripe_local_wipe(local);
- mem_put(local);
- }
- if (xattrs)
- dict_unref(xattrs);
- return 0;
-}
-int32_t
-stripe_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(fd, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- local->fd = fd_ref(fd);
-
- local->wind_count = 0;
-
- local->count = 0;
- local->op_ret = -1;
- INIT_LIST_HEAD(&local->entries);
-
- if (!trav)
- goto err;
-
- STACK_WIND(frame, stripe_readdirp_cbk, trav->xlator,
- trav->xlator->fops->readdirp, fd, size, off, xdata);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- STRIPE_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- goto out;
-
- ret = xlator_mem_acct_init(this, gf_stripe_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- "failed");
- goto out;
- }
-
-out:
- return ret;
-}
-
-static int
-clear_pattern_list(stripe_private_t *priv)
-{
- struct stripe_options *prev = NULL;
- struct stripe_options *trav = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("stripe", priv, out);
-
- trav = priv->pattern;
- priv->pattern = NULL;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE(prev);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int
-reconfigure(xlator_t *this, dict_t *options)
-{
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- int ret = -1;
- volume_option_t *opt = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(this->private);
-
- priv = this->private;
-
- ret = 0;
- LOCK(&priv->lock);
- {
- ret = clear_pattern_list(priv);
- if (ret)
- goto unlock;
-
- data = dict_get(options, "block-size");
- if (data) {
- ret = set_stripe_block_size(this, priv, data->data);
- if (ret)
- goto unlock;
- } else {
- opt = xlator_volume_option_get(this, "block-size");
- if (!opt) {
- gf_log(this->name, GF_LOG_WARNING,
- "option 'block-size' not found");
- ret = -1;
- goto unlock;
- }
-
- if (gf_string2bytesize_uint64(opt->default_value,
- &priv->block_size)) {
- gf_log(this->name, GF_LOG_ERROR,
- "Unable to set default block-size ");
- ret = -1;
- goto unlock;
- }
- }
-
- GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, unlock);
- }
-unlock:
- UNLOCK(&priv->lock);
- if (ret)
- goto out;
-
- ret = 0;
-out:
- return ret;
-}
-
-/**
- * init - This function is called when xlator-graph gets initialized.
- * The option given in volfiles are parsed here.
- * @this -
- */
-int32_t
-init(xlator_t *this)
-{
- stripe_private_t *priv = NULL;
- volume_option_t *opt = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- int32_t count = 0;
- int ret = -1;
-
- if (!this)
- goto out;
-
- trav = this->children;
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- if (!count) {
- gf_log(this->name, GF_LOG_ERROR,
- "stripe configured without \"subvolumes\" option. "
- "exiting");
- goto out;
- }
-
- if (!this->parents) {
- gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
- }
-
- if (count == 1) {
- gf_log(this->name, GF_LOG_ERROR,
- "stripe configured with only one \"subvolumes\" option."
- " please check the volume. exiting");
- goto out;
- }
-
- priv = GF_CALLOC(1, sizeof(stripe_private_t),
- gf_stripe_mt_stripe_private_t);
-
- if (!priv)
- goto out;
- priv->xl_array = GF_CALLOC(count, sizeof(xlator_t *),
- gf_stripe_mt_xlator_t);
- if (!priv->xl_array)
- goto out;
-
- priv->last_event = GF_CALLOC(count, sizeof(int), gf_stripe_mt_int32_t);
- if (!priv->last_event)
- goto out;
-
- priv->child_count = count;
- LOCK_INIT(&priv->lock);
-
- trav = this->children;
- count = 0;
- while (trav) {
- priv->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
-
- if (count > 256) {
- gf_log(this->name, GF_LOG_ERROR,
- "maximum number of stripe subvolumes supported "
- "is 256");
- goto out;
- }
-
- ret = 0;
- LOCK(&priv->lock);
- {
- opt = xlator_volume_option_get(this, "block-size");
- if (!opt) {
- gf_log(this->name, GF_LOG_WARNING, "option 'block-size' not found");
- ret = -1;
- goto unlock;
- }
- if (gf_string2bytesize_uint64(opt->default_value, &priv->block_size)) {
- gf_log(this->name, GF_LOG_ERROR,
- "Unable to set default block-size ");
- ret = -1;
- goto unlock;
- }
- /* option stripe-pattern *avi:1GB,*pdf:16K */
- data = dict_get(this->options, "block-size");
- if (data) {
- ret = set_stripe_block_size(this, priv, data->data);
- if (ret)
- goto unlock;
- }
- }
-unlock:
- UNLOCK(&priv->lock);
- if (ret)
- goto out;
-
- GF_OPTION_INIT("use-xattr", priv->xattr_supported, bool, out);
- /* notify related */
- priv->nodes_down = priv->child_count;
-
- GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
-
- this->local_pool = mem_pool_new(stripe_local_t, 128);
- if (!this->local_pool) {
- ret = -1;
- gf_log(this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- this->private = priv;
-
- ret = 0;
-out:
- if (ret) {
- if (priv) {
- GF_FREE(priv->xl_array);
- GF_FREE(priv);
- }
- }
- return ret;
-}
-
-/**
- * fini - Free all the private variables
- * @this -
- */
-void
-fini(xlator_t *this)
-{
- stripe_private_t *priv = NULL;
- struct stripe_options *prev = NULL;
- struct stripe_options *trav = NULL;
-
- if (!this)
- goto out;
-
- priv = this->private;
- if (priv) {
- this->private = NULL;
- GF_FREE(priv->xl_array);
-
- trav = priv->pattern;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE(prev);
- }
- GF_FREE(priv->last_event);
- LOCK_DESTROY(&priv->lock);
- GF_FREE(priv);
- }
-
-out:
- return;
-}
-
-int32_t
-stripe_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno,
- dict_t *dict, dict_t *xdata)
-
-{
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int
-stripe_internal_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
-{
- char size_key[256] = {
- 0,
- };
- char index_key[256] = {
- 0,
- };
- char count_key[256] = {
- 0,
- };
- char coalesce_key[256] = {
- 0,
- };
-
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
-
- if (!xattr || (op_ret == -1))
- goto out;
-
- sprintf(size_key, "trusted.%s.stripe-size", this->name);
- sprintf(count_key, "trusted.%s.stripe-count", this->name);
- sprintf(index_key, "trusted.%s.stripe-index", this->name);
- sprintf(coalesce_key, "trusted.%s.stripe-coalesce", this->name);
-
- dict_del(xattr, size_key);
- dict_del(xattr, count_key);
- dict_del(xattr, index_key);
- dict_del(xattr, coalesce_key);
-
-out:
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
-
- return 0;
-}
-
-int
-stripe_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
-{
- int call_cnt = 0;
- stripe_local_t *local = NULL;
-
- VALIDATE_OR_GOTO(frame, out);
- VALIDATE_OR_GOTO(frame->local, out);
-
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- call_cnt = --local->wind_count;
- }
- UNLOCK(&frame->lock);
-
- if (!xattr || (op_ret < 0))
- goto out;
-
- local->op_ret = 0;
-
- if (!local->xattr) {
- local->xattr = dict_ref(xattr);
- } else {
- stripe_aggregate_xattr(local->xattr, xattr);
- }
-
-out:
- if (!call_cnt) {
- STRIPE_STACK_UNWIND(getxattr, frame, (local ? local->op_ret : -1),
- op_errno, (local ? local->xattr : NULL), xdata);
- }
-
- return 0;
-}
-
-int32_t
-stripe_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- int32_t callcnt = 0;
- int32_t ret = -1;
- long cky = 0;
- void *xattr_val = NULL;
- void *xattr_serz = NULL;
- stripe_xattr_sort_t *xattr = NULL;
- dict_t *stripe_xattr = NULL;
-
- if (!frame || !frame->local || !this) {
- gf_log("", GF_LOG_ERROR, "Possible NULL deref");
- return ret;
- }
-
- local = frame->local;
- cky = (long)cookie;
-
- if (local->xsel[0] == '\0') {
- gf_log(this->name, GF_LOG_ERROR, "Empty xattr in cbk");
- return ret;
- }
-
- LOCK(&frame->lock);
- {
- callcnt = --local->wind_count;
-
- if (!dict || (op_ret < 0))
- goto out;
-
- if (!local->xattr_list)
- local->xattr_list = (stripe_xattr_sort_t *)GF_CALLOC(
- local->nallocs, sizeof(stripe_xattr_sort_t),
- gf_stripe_mt_xattr_sort_t);
-
- if (local->xattr_list) {
- xattr = local->xattr_list + (int32_t)cky;
-
- ret = dict_get_ptr_and_len(dict, local->xsel, &xattr_val,
- &xattr->xattr_len);
- if (xattr->xattr_len == 0)
- goto out;
-
- xattr->pos = cky;
- xattr->xattr_value = gf_memdup(xattr_val, xattr->xattr_len);
-
- if (xattr->xattr_value != NULL)
- local->xattr_total_len += xattr->xattr_len + 1;
- }
- }
-out:
- UNLOCK(&frame->lock);
-
- if (!callcnt) {
- if (!local->xattr_total_len)
- goto unwind;
-
- stripe_xattr = dict_new();
- if (!stripe_xattr)
- goto unwind;
-
- /* select filler based on ->xsel */
- if (XATTR_IS_PATHINFO(local->xsel))
- ret = stripe_fill_pathinfo_xattr(this, local, (char **)&xattr_serz);
- else if (XATTR_IS_LOCKINFO(local->xsel)) {
- ret = stripe_fill_lockinfo_xattr(this, local, &xattr_serz);
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Unknown xattr in xattr request");
- goto unwind;
- }
-
- if (!ret) {
- ret = dict_set_dynptr(stripe_xattr, local->xsel, xattr_serz,
- local->xattr_total_len);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR, "Can't set %s key in dict",
- local->xsel);
- }
-
- unwind:
- /*
- * Among other things, STRIPE_STACK_UNWIND will free "local"
- * for us. That means we can't dereference it afterward.
- * Fortunately, the actual result is in stripe_xattr now, so we
- * can simply clean up before unwinding.
- */
- ret = stripe_free_xattr_str(local);
- GF_FREE(local->xattr_list);
- local->xattr_list = NULL;
-
- STRIPE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, stripe_xattr,
- NULL);
-
- if (stripe_xattr)
- dict_unref(stripe_xattr);
- }
-
- return ret;
-}
-
-int
-stripe_marker_populate_args(call_frame_t *frame, int type, int *gauge,
- xlator_t **subvols)
-{
- xlator_t *this = frame->this;
- stripe_private_t *priv = this->private;
- stripe_local_t *local = frame->local;
- int count = 0;
-
- count = priv->child_count;
- if (MARKER_XTIME_TYPE == type) {
- if (!IA_FILE_OR_DIR(local->loc.inode->ia_type))
- count = 1;
- }
- memcpy(subvols, priv->xl_array, sizeof(*subvols) * count);
-
- return count;
-}
-
-int32_t
-stripe_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int i = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO(frame, err);
- VALIDATE_OR_GOTO(this, err);
- VALIDATE_OR_GOTO(loc, err);
- VALIDATE_OR_GOTO(loc->path, err);
- VALIDATE_OR_GOTO(loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy(&local->loc, loc);
-
- if (name && strncmp(name, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY)) == 0) {
- local->wind_count = priv->child_count;
-
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND(frame, stripe_getxattr_cbk, trav->xlator,
- trav->xlator->fops->getxattr, loc, name, xdata);
- }
-
- return 0;
- }
-
- if (name && (XATTR_IS_PATHINFO(name))) {
- if (IA_ISREG(loc->inode->ia_type)) {
- ret = inode_ctx_get(loc->inode, this, (uint64_t *)&local->fctx);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "stripe size unavailable from fctx"
- " relying on pathinfo could lead to"
- " wrong results");
- }
-
- local->nallocs = local->wind_count = priv->child_count;
- (void)strncpy(local->xsel, name, strlen(name));
-
- /**
- * for xattrs that need info from all children, fill ->xsel
- * as above and call the filler function in cbk based on
- * it
- */
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i,
- trav->xlator, trav->xlator->fops->getxattr, loc,
- name, xdata);
- }
-
- return 0;
- }
-
- if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid,
- stripe_getxattr_unwind,
- stripe_marker_populate_args) == 0)
- return 0;
-
- STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
-
- return 0;
-
-err:
- STRIPE_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-static gf_boolean_t
-stripe_is_special_xattr(const char *name)
-{
- gf_boolean_t is_spl = _gf_false;
-
- if (!name) {
- goto out;
- }
-
- if (!strncmp(name, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) ||
- XATTR_IS_PATHINFO(name))
- is_spl = _gf_true;
-out:
- return is_spl;
-}
-
-int32_t
-stripe_fgetxattr_from_everyone(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t ret = -1, op_errno = 0;
- int i = 0;
- xlator_list_t *trav = NULL;
-
- priv = this->private;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- frame->local = local;
-
- strncpy(local->xsel, name, strlen(name));
- local->nallocs = local->wind_count = priv->child_count;
-
- for (i = 0, trav = this->children; i < priv->child_count;
- i++, trav = trav->next) {
- STACK_WIND_COOKIE(frame, stripe_vgetxattr_cbk, (void *)(long)i,
- trav->xlator, trav->xlator->fops->fgetxattr, fd, name,
- xdata);
- }
-
- return 0;
-
-err:
- STACK_UNWIND_STRICT(fgetxattr, frame, -1, op_errno, NULL, NULL);
- return ret;
-}
-
-int32_t
-stripe_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- if (stripe_is_special_xattr(name)) {
- stripe_fgetxattr_from_everyone(frame, this, fd, name, xdata);
- goto out;
- }
-
- STACK_WIND(frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
-
-out:
- return 0;
-}
-
-int32_t
-stripe_priv_dump(xlator_t *this)
-{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- stripe_private_t *priv = NULL;
- int ret = -1;
- struct stripe_options *options = NULL;
-
- GF_VALIDATE_OR_GOTO("stripe", this, out);
-
- priv = this->private;
- if (!priv)
- goto out;
-
- ret = TRY_LOCK(&priv->lock);
- if (ret != 0)
- goto out;
-
- gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name);
- gf_proc_dump_write("child_count", "%d", priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sprintf(key, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", priv->xl_array[i]->type,
- priv->xl_array[i]->name);
- }
-
- options = priv->pattern;
- while (options != NULL) {
- gf_proc_dump_write("path_pattern", "%s", priv->pattern->path_pattern);
- gf_proc_dump_write("options_block_size", "%ul", options->block_size);
-
- options = options->next;
- }
-
- gf_proc_dump_write("block_size", "%ul", priv->block_size);
- gf_proc_dump_write("nodes-down", "%d", priv->nodes_down);
- gf_proc_dump_write("first-child_down", "%d", priv->first_child_down);
- gf_proc_dump_write("xattr_supported", "%d", priv->xattr_supported);
-
- UNLOCK(&priv->lock);
-
-out:
- return ret;
-}
-
-struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .truncate = stripe_truncate,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .setattr = stripe_setattr,
- .fsetattr = stripe_fsetattr,
- .lookup = stripe_lookup,
- .mknod = stripe_mknod,
- .setxattr = stripe_setxattr,
- .fsetxattr = stripe_fsetxattr,
- .getxattr = stripe_getxattr,
- .fgetxattr = stripe_fgetxattr,
- .removexattr = stripe_removexattr,
- .fremovexattr = stripe_fremovexattr,
- .readdirp = stripe_readdirp,
- .fallocate = stripe_fallocate,
- .discard = stripe_discard,
- .zerofill = stripe_zerofill,
- .seek = stripe_seek,
-};
-
-struct xlator_cbks cbks = {
- .release = stripe_release,
- .forget = stripe_forget,
-};
-
-struct xlator_dumpops dumpops = {
- .priv = stripe_priv_dump,
-};
-
-struct volume_options options[] = {
- {
- .key = {"block-size"},
- .type = GF_OPTION_TYPE_SIZE_LIST,
- .default_value = "128KB",
- .min = STRIPE_MIN_BLOCK_SIZE,
- .description = "Size of the stripe unit that would be read "
- "from or written to the striped servers.",
- .op_version = {1},
- .tags = {"stripe"},
- .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
- },
- {
- .key = {"use-xattr"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "handle the stripe without the xattr",
- .tags = {"stripe", "dev-only"},
- .flags = OPT_FLAG_CLIENT_OPT,
- },
- {
- .key = {"coalesce"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "Enable/Disable coalesce mode to flatten striped "
- "files as stored on the server (i.e., eliminate holes "
- "caused by the traditional format).",
- .op_version = {1},
- .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
- .tags = {"stripe"},
- },
- {.key = {NULL}},
-};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
deleted file mode 100644
index 103c96491ff..00000000000
--- a/xlators/cluster/stripe/src/stripe.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _STRIPE_H_
-#define _STRIPE_H_
-
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "stripe-mem-types.h"
-#include "libxlator.h"
-#include <fnmatch.h>
-#include <signal.h>
-
-#define STRIPE_PATHINFO_HEADER "STRIPE:"
-#define STRIPE_MIN_BLOCK_SIZE (16 * GF_UNIT_KB)
-
-#define STRIPE_STACK_UNWIND(fop, frame, params...) \
- do { \
- stripe_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT(fop, frame, params); \
- if (__local) { \
- stripe_local_wipe(__local); \
- mem_put(__local); \
- } \
- } while (0)
-
-#define STRIPE_STACK_DESTROY(frame) \
- do { \
- stripe_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY(frame->root); \
- if (__local) { \
- stripe_local_wipe(__local); \
- mem_put(__local); \
- } \
- } while (0)
-
-#define STRIPE_VALIDATE_FCTX(fctx, label) \
- do { \
- int idx = 0; \
- if (!fctx) { \
- op_errno = EINVAL; \
- goto label; \
- } \
- for (idx = 0; idx < fctx->stripe_count; idx++) { \
- if (!fctx->xl_array[idx]) { \
- gf_log(this->name, GF_LOG_ERROR, "fctx->xl_array[%d] is NULL", \
- idx); \
- op_errno = ESTALE; \
- goto label; \
- } \
- } \
- } while (0)
-
-typedef struct stripe_xattr_sort {
- int pos;
- int xattr_len;
- char *xattr_value;
-} stripe_xattr_sort_t;
-
-/**
- * struct stripe_options : This keeps the pattern and the block-size
- * information, which is used for striping on a file.
- */
-struct stripe_options {
- struct stripe_options *next;
- char path_pattern[256];
- uint64_t block_size;
-};
-
-/**
- * Private structure for stripe translator
- */
-struct stripe_private {
- struct stripe_options *pattern;
- xlator_t **xl_array;
- uint64_t block_size;
- gf_lock_t lock;
- uint8_t nodes_down;
- int8_t first_child_down;
- int *last_event;
- int8_t child_count;
- gf_boolean_t xattr_supported; /* default yes */
- gf_boolean_t coalesce;
- char vol_uuid[UUID_SIZE + 1];
-};
-
-/**
- * Used to keep info about the replies received from readv/writev calls
- */
-struct stripe_replies {
- struct iovec *vector;
- int32_t count; // count of vector
- int32_t op_ret; // op_ret of readv
- int32_t op_errno;
- int32_t requested_size;
- struct iatt stbuf; /* 'stbuf' is also a part of reply */
-};
-
-typedef struct _stripe_fd_ctx {
- off_t stripe_size;
- int stripe_count;
- int stripe_coalesce;
- int static_array;
- xlator_t **xl_array;
-} stripe_fd_ctx_t;
-
-/**
- * Local structure to be passed with all the frames in case of STACK_WIND
- */
-struct stripe_local; /* this itself is used inside the structure; */
-
-struct stripe_local {
- struct stripe_local *next;
- call_frame_t *orig_frame;
-
- stripe_fd_ctx_t *fctx;
-
- /* Used by _cbk functions */
- struct iatt stbuf;
- struct iatt pre_buf;
- struct iatt post_buf;
- struct iatt preparent;
- struct iatt postparent;
-
- off_t stbuf_size;
- off_t prebuf_size;
- off_t postbuf_size;
- off_t preparent_size;
- off_t postparent_size;
-
- blkcnt_t stbuf_blocks;
- blkcnt_t prebuf_blocks;
- blkcnt_t postbuf_blocks;
- blkcnt_t preparent_blocks;
- blkcnt_t postparent_blocks;
-
- struct stripe_replies *replies;
- struct statvfs statvfs_buf;
- dir_entry_t *entry;
-
- int8_t revalidate;
- int8_t failed;
- int8_t unwind;
-
- size_t readv_size;
- int32_t entry_count;
- int32_t node_index;
- int32_t call_count;
- int32_t wind_count; /* used instead of child_cound
- in case of read and write */
- int32_t op_ret;
- int32_t op_errno;
- int32_t count;
- int32_t flags;
- char *name;
- inode_t *inode;
-
- loc_t loc;
- loc_t loc2;
-
- mode_t mode;
- dev_t rdev;
- /* For File I/O fops */
- dict_t *xdata;
-
- stripe_xattr_sort_t *xattr_list;
- int32_t xattr_total_len;
- int32_t nallocs;
- char xsel[256];
-
- /* General usage */
- off_t offset;
- off_t stripe_size;
-
- int xattr_self_heal_needed;
- int entry_self_heal_needed;
-
- int8_t *list;
- struct gf_flock lock;
- fd_t *fd;
- void *value;
- struct iobref *iobref;
- gf_dirent_t entries;
- gf_dirent_t *dirent;
- dict_t *xattr;
- uuid_t ia_gfid;
-
- int xflag;
- mode_t umask;
-};
-
-typedef struct stripe_local stripe_local_t;
-typedef struct stripe_private stripe_private_t;
-
-/*
- * Determine the stripe index of a particular frame based on the translator.
- */
-static inline int32_t
-stripe_get_frame_index(stripe_fd_ctx_t *fctx, call_frame_t *prev)
-{
- int32_t i, idx = -1;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (fctx->xl_array[i] == prev->this) {
- idx = i;
- break;
- }
- }
-
- return idx;
-}
-
-static inline void
-stripe_copy_xl_array(xlator_t **dst, xlator_t **src, int count)
-{
- int i;
-
- for (i = 0; i < count; i++)
- dst[i] = src[i];
-}
-
-void
-stripe_local_wipe(stripe_local_t *local);
-int32_t
-stripe_ctx_handle(xlator_t *this, call_frame_t *prev, stripe_local_t *local,
- dict_t *dict);
-void
-stripe_aggregate_xattr(dict_t *dst, dict_t *src);
-int32_t
-stripe_xattr_request_build(xlator_t *this, dict_t *dict, uint64_t stripe_size,
- uint32_t stripe_count, uint32_t stripe_index,
- uint32_t stripe_coalesce);
-int32_t
-stripe_get_matching_bs(const char *path, stripe_private_t *priv);
-int
-set_stripe_block_size(xlator_t *this, stripe_private_t *priv, char *data);
-int32_t
-stripe_iatt_merge(struct iatt *from, struct iatt *to);
-int32_t
-stripe_fill_pathinfo_xattr(xlator_t *this, stripe_local_t *local,
- char **xattr_serz);
-int32_t
-stripe_free_xattr_str(stripe_local_t *local);
-int32_t
-stripe_xattr_aggregate(char *buffer, stripe_local_t *local, int32_t *total);
-off_t
-coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
-off_t
-uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
- int stripe_index);
-int32_t
-stripe_fill_lockinfo_xattr(xlator_t *this, stripe_local_t *local,
- void **xattr_serz);
-
-/*
- * Adjust the size attribute for files if coalesce is enabled.
- */
-static inline void
-correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx, call_frame_t *prev)
-{
- int index;
-
- if (!IA_ISREG(buf->ia_type))
- return;
-
- if (!fctx || !fctx->stripe_coalesce)
- return;
-
- index = stripe_get_frame_index(fctx, prev);
- buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
- fctx->stripe_count, index);
-}
-
-#endif /* _STRIPE_H_ */
diff --git a/xlators/debug/delay-gen/src/delay-gen-mem-types.h b/xlators/debug/delay-gen/src/delay-gen-mem-types.h
index 63a15a70da3..c89a9217193 100644
--- a/xlators/debug/delay-gen/src/delay-gen-mem-types.h
+++ b/xlators/debug/delay-gen/src/delay-gen-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __DELAY_GEN_MEM_TYPES_H__
#define __DELAY_GEN_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_delay_gen_mem_types_ {
gf_delay_gen_mt_dg_t = gf_common_mt_end + 1,
diff --git a/xlators/debug/delay-gen/src/delay-gen-messages.h b/xlators/debug/delay-gen/src/delay-gen-messages.h
index a9046ca14bf..bc98cec2885 100644
--- a/xlators/debug/delay-gen/src/delay-gen-messages.h
+++ b/xlators/debug/delay-gen/src/delay-gen-messages.h
@@ -11,7 +11,7 @@
#ifndef __DELAY_GEN_MESSAGES_H__
#define __DELAY_GEN_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/debug/delay-gen/src/delay-gen.c b/xlators/debug/delay-gen/src/delay-gen.c
index a2d02527f23..4698f1fd785 100644
--- a/xlators/debug/delay-gen/src/delay-gen.c
+++ b/xlators/debug/delay-gen/src/delay-gen.c
@@ -27,7 +27,7 @@ delay_gen(xlator_t *this, int fop)
return 0;
if ((rand() % DELAY_GRANULARITY) < dg->delay_ppm)
- usleep(dg->delay_duration);
+ gf_nanosleep(dg->delay_duration * GF_US_IN_NS);
return 0;
}
@@ -679,4 +679,19 @@ struct volume_options options[] = {
.default_value = "",
},
- {.key = {NULL}}};
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "delay-gen",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/delay-gen/src/delay-gen.h b/xlators/debug/delay-gen/src/delay-gen.h
index 5e4d179f0b4..afa95e5eb2d 100644
--- a/xlators/debug/delay-gen/src/delay-gen.h
+++ b/xlators/debug/delay-gen/src/delay-gen.h
@@ -13,9 +13,9 @@
#include "delay-gen-mem-types.h"
#include "delay-gen-messages.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
typedef struct {
int enable[GF_FOP_MAXVALUE];
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
index 2facd6b27cb..b9b713af8fc 100644
--- a/xlators/debug/error-gen/src/error-gen-mem-types.h
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __ERROR_GEN_MEM_TYPES_H__
#define __ERROR_GEN_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_error_gen_mem_types_ {
gf_error_gen_mt_eg_t = gf_common_mt_end + 1,
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index c6595b4c0e4..d45655ef4c3 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -7,10 +7,10 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "error-gen.h"
-#include "statedump.h"
-#include "defaults.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/defaults.h>
/*
* The user can specify an error probability as a float percentage, but we
@@ -31,9 +31,9 @@
sys_error_t error_no_list[] = {
[GF_FOP_LOOKUP] = {.error_no_count = 4,
.error_no = {ENOENT, ENOTDIR, ENAMETOOLONG, EAGAIN}},
- [GF_FOP_STAT] = {.error_no_count = 7,
- .error_no = {EACCES, EBADF, EFAULT, ENAMETOOLONG, ENOENT,
- ENOMEM, ENOTDIR}},
+ [GF_FOP_STAT] = {.error_no_count = 6,
+ .error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOENT, ENOMEM,
+ ENOTDIR}},
[GF_FOP_READLINK] = {.error_no_count = 8,
.error_no = {EACCES, EFAULT, EINVAL, EIO, ENAMETOOLONG,
ENOENT, ENOMEM, ENOTDIR}},
@@ -79,21 +79,20 @@ sys_error_t error_no_list[] = {
[GF_FOP_WRITE] = {.error_no_count = 7,
.error_no = {EINVAL, EBADF, EFAULT, EISDIR, ENAMETOOLONG,
ENOSPC, GF_ERROR_SHORT_WRITE}},
- [GF_FOP_STATFS] = {.error_no_count = 10,
- .error_no = {EACCES, EBADF, EFAULT, EINTR, EIO,
- ENAMETOOLONG, ENOENT, ENOMEM, ENOSYS,
- ENOTDIR}},
+ [GF_FOP_STATFS] = {.error_no_count = 9,
+ .error_no = {EACCES, EFAULT, EINTR, EIO, ENAMETOOLONG,
+ ENOENT, ENOMEM, ENOSYS, ENOTDIR}},
[GF_FOP_FLUSH] = {.error_no_count = 5,
.error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOSYS,
ENOENT}},
[GF_FOP_FSYNC] = {.error_no_count = 4,
.error_no = {EBADF, EIO, EROFS, EINVAL}},
- [GF_FOP_SETXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
- [GF_FOP_GETXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
- [GF_FOP_REMOVEXATTR] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
+ [GF_FOP_SETXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, EINTR, ENAMETOOLONG}},
+ [GF_FOP_GETXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
+ [GF_FOP_REMOVEXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
[GF_FOP_FSETXATTR] = {.error_no_count = 4,
.error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
[GF_FOP_FGETXATTR] = {.error_no_count = 4,
@@ -125,26 +124,25 @@ sys_error_t error_no_list[] = {
ENOENT}},
[GF_FOP_FXATTROP] = {.error_no_count = 4,
.error_no = {EBADF, EIO, EROFS, EINVAL}},
- [GF_FOP_INODELK] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
+ [GF_FOP_INODELK] = {.error_no_count = 3,
+ .error_no = {EACCES, EINTR, ENAMETOOLONG}},
[GF_FOP_FINODELK] = {.error_no_count = 4,
.error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
- [GF_FOP_ENTRYLK] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
+ [GF_FOP_ENTRYLK] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
[GF_FOP_FENTRYLK] = {.error_no_count = 10,
.error_no = {EACCES, EEXIST, EFAULT, EISDIR, EMFILE,
ENAMETOOLONG, ENFILE, ENODEV, ENOENT,
ENOMEM}},
- [GF_FOP_SETATTR] = {.error_no_count = 11,
+ [GF_FOP_SETATTR] = {.error_no_count = 10,
.error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT,
- ENOMEM, ENOTDIR, EPERM, EROFS, EBADF,
- EIO}},
+ ENOMEM, ENOTDIR, EPERM, EROFS, EIO}},
[GF_FOP_FSETATTR] = {.error_no_count = 11,
.error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT,
ENOMEM, ENOTDIR, EPERM, EROFS, EBADF,
EIO}},
- [GF_FOP_GETSPEC] = {.error_no_count = 4,
- .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}}};
+ [GF_FOP_GETSPEC] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}}};
int
generate_rand_no(int op_no)
@@ -761,6 +759,7 @@ error_gen_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
*/
shortvec = iov_dup(vector, 1);
shortvec->iov_len /= 2;
+ count = 1;
goto wind;
} else if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
@@ -1396,7 +1395,7 @@ error_gen_priv_dump(xlator_t *this)
gf_proc_dump_write("op_count", "%d", conf->op_count);
gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
- gf_proc_dump_write("error_no", "%s", conf->error_no);
+ gf_proc_dump_write("error_no", "%d", conf->error_no_int);
gf_proc_dump_write("random_failure", "%d", conf->random_failure);
UNLOCK(&conf->lock);
@@ -1430,6 +1429,7 @@ reconfigure(xlator_t *this, dict_t *options)
eg_t *pvt = NULL;
int32_t ret = 0;
char *error_enable_fops = NULL;
+ char *error_no = NULL;
double failure_percent_dbl = 0.0;
if (!this || !this->private)
@@ -1439,10 +1439,10 @@ reconfigure(xlator_t *this, dict_t *options)
ret = -1;
- GF_OPTION_RECONF("error-no", pvt->error_no, options, str, out);
+ GF_OPTION_RECONF("error-no", error_no, options, str, out);
- if (pvt->error_no)
- pvt->error_no_int = conv_errno_to_int(&pvt->error_no);
+ if (error_no)
+ pvt->error_no_int = conv_errno_to_int(&error_no);
GF_OPTION_RECONF("failure", failure_percent_dbl, options, percent, out);
@@ -1466,6 +1466,7 @@ init(xlator_t *this)
eg_t *pvt = NULL;
int32_t ret = 0;
char *error_enable_fops = NULL;
+ char *error_no = NULL;
double failure_percent_dbl = 0.0;
if (!this->children || this->children->next) {
@@ -1490,10 +1491,10 @@ init(xlator_t *this)
ret = -1;
- GF_OPTION_INIT("error-no", pvt->error_no, str, out);
+ GF_OPTION_INIT("error-no", error_no, str, out);
- if (pvt->error_no)
- pvt->error_no_int = conv_errno_to_int(&pvt->error_no);
+ if (error_no)
+ pvt->error_no_int = conv_errno_to_int(&error_no);
GF_OPTION_INIT("failure", failure_percent_dbl, percent, out);
@@ -1506,8 +1507,8 @@ init(xlator_t *this)
this->private = pvt;
- /* Give some seed value here */
- srand(time(NULL));
+ /* Give some seed value here. */
+ srand(gf_time());
ret = 0;
out:
@@ -1644,4 +1645,19 @@ struct volume_options options[] = {
.flags = OPT_FLAG_SETTABLE,
},
- {.key = {NULL}}};
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "error-gen",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
index ffa09252d0f..2478cd5b21c 100644
--- a/xlators/debug/error-gen/src/error-gen.h
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -36,7 +36,6 @@ typedef struct {
* It's just not worth blowing up the diff by changing it.
*/
int failure_iter_no;
- char *error_no;
int error_no_int;
gf_boolean_t random_failure;
gf_lock_t lock;
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index bc25fd2ca4e..51d38d8b97c 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __IO_STATS_MEM_TYPES_H__
#define __IO_STATS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
extern const char *__progname;
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 29c9af6f3c5..aa00c446e5a 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -7,8 +7,8 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "syscall.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/syscall.h>
/**
* xlators/debug/io_stats :
@@ -28,18 +28,18 @@
#include <fnmatch.h>
#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
#include "io-stats-mem-types.h"
#include <stdarg.h>
-#include "defaults.h"
-#include "logging.h"
-#include "cli1-xdr.h"
-#include "statedump.h"
-#include "syncop.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
#include <pwd.h>
#include <grp.h>
-#include "upcall-utils.h"
+#include <glusterfs/upcall-utils.h>
+#include <glusterfs/async.h>
#define MAX_LIST_MEMBERS 100
#define DEFAULT_PWD_BUF_SZ 16384
@@ -66,6 +66,17 @@ typedef enum {
IOS_STATS_THRU_MAX,
} ios_stats_thru_t;
+/* This is same as gf1_cli_info_op */
+/* had to be defined here again, so we have modularity between
+ xdr, xlator, and library functions */
+typedef enum ios_info_op {
+ GF_IOS_INFO_NONE = 0,
+ GF_IOS_INFO_ALL = 1,
+ GF_IOS_INFO_INCREMENTAL = 2,
+ GF_IOS_INFO_CUMULATIVE = 3,
+ GF_IOS_INFO_CLEAR = 4,
+} ios_info_op_t;
+
struct ios_stat_lat {
struct timeval time;
double throughput;
@@ -124,7 +135,7 @@ struct ios_global_stats {
gf_atomic_t block_count_read[IOS_BLOCK_COUNT_SIZE];
gf_atomic_t fop_hits[GF_FOP_MAXVALUE];
gf_atomic_t upcall_hits[GF_UPCALL_FLAGS_MAXVALUE];
- struct timeval started_at;
+ time_t started_at;
struct ios_lat latency[GF_FOP_MAXVALUE];
uint64_t nr_opens;
uint64_t max_nr_opens;
@@ -173,7 +184,6 @@ struct ios_conf {
*/
char *unique_id;
ios_dump_type_t dump_format;
- char *dump_format_str;
};
struct ios_fd {
@@ -282,9 +292,7 @@ is_fop_latency_started(call_frame_t *frame)
begin = &frame->begin; \
end = &frame->end; \
\
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 + \
- (end->tv_nsec - begin->tv_nsec)) / \
- 1000; \
+ elapsed = gf_tsdiff(begin, end) / 1000.0; \
throughput = op_ret / elapsed; \
\
conf = this->private; \
@@ -668,10 +676,7 @@ ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this,
FILE *logfp, ios_stats_thru_t type)
{
struct ios_stat_list *entry = NULL;
- struct timeval time = {
- 0,
- };
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
@@ -679,12 +684,9 @@ ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this,
{
list_for_each_entry(entry, &list_head->iosstats->list, list)
{
- gf_time_fmt(timestr, sizeof timestr,
- entry->iosstat->thru_counters[type].time.tv_sec,
- gf_timefmt_FT);
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS,
- time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &entry->iosstat->thru_counters[type].time,
+ gf_timefmt_FT);
ios_log(this, logfp, "%s \t %-10.2f \t %s", timestr, entry->value,
entry->iosstat->filename);
@@ -763,9 +765,8 @@ err:
int
io_stats_dump_global_to_json_logfp(xlator_t *this,
- struct ios_global_stats *stats,
- struct timeval *now, int interval,
- FILE *logfp)
+ struct ios_global_stats *stats, time_t now,
+ int interval, FILE *logfp)
{
int i = 0;
int j = 0;
@@ -791,10 +792,7 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
};
dict_t *xattr = NULL;
- interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) -
- (stats->started_at.tv_sec * 1000000.0 +
- stats->started_at.tv_usec)) /
- 1000000.0;
+ interval_sec = (double)(now - stats->started_at);
conf = this->private;
@@ -824,19 +822,18 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
if (interval == -1) {
- ios_log(this, logfp, "\"%s.%s.read_%d%s\": \"%" GF_PRI_ATOMIC "\",",
+ ios_log(this, logfp, "\"%s.%s.read_%d%s\": %" GF_PRI_ATOMIC ",",
key_prefix, str_prefix, rw_size, rw_unit,
GF_ATOMIC_GET(stats->block_count_read[i]));
- ios_log(this, logfp,
- "\"%s.%s.write_%d%s\": \"%" GF_PRI_ATOMIC "\",", key_prefix,
- str_prefix, rw_size, rw_unit,
+ ios_log(this, logfp, "\"%s.%s.write_%d%s\": %" GF_PRI_ATOMIC ",",
+ key_prefix, str_prefix, rw_size, rw_unit,
GF_ATOMIC_GET(stats->block_count_write[i]));
} else {
- ios_log(this, logfp, "\"%s.%s.read_%d%s_per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.read_%d%s_per_sec\": %0.2lf,",
key_prefix, str_prefix, rw_size, rw_unit,
(double)(GF_ATOMIC_GET(stats->block_count_read[i]) /
interval_sec));
- ios_log(this, logfp, "\"%s.%s.write_%d%s_per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.write_%d%s_per_sec\": %0.2lf,",
key_prefix, str_prefix, rw_size, rw_unit,
(double)(GF_ATOMIC_GET(stats->block_count_write[i]) /
interval_sec));
@@ -844,9 +841,9 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
if (interval == -1) {
- ios_log(this, logfp, "\"%s.%s.fds.open_count\": \"%" PRId64 "\",",
+ ios_log(this, logfp, "\"%s.%s.fds.open_count\": %" PRId64 ",",
key_prefix, str_prefix, conf->cumulative.nr_opens);
- ios_log(this, logfp, "\"%s.%s.fds.max_open_count\": \"%" PRId64 "\",",
+ ios_log(this, logfp, "\"%s.%s.fds.max_open_count\": %" PRId64 ",",
key_prefix, str_prefix, conf->cumulative.max_nr_opens);
}
@@ -868,20 +865,19 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
}
if (interval == -1) {
- ios_log(this, logfp,
- "\"%s.%s.fop.%s.count\": \"%" GF_PRI_ATOMIC "\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.count\": %" GF_PRI_ATOMIC ",",
key_prefix, str_prefix, lc_fop_name, fop_hits);
} else {
- ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name,
(double)(fop_hits / interval_sec));
}
- ios_log(this, logfp, "\"%s.%s.fop.%s.latency_ave_usec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_ave_usec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name, fop_lat_ave);
- ios_log(this, logfp, "\"%s.%s.fop.%s.latency_min_usec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_min_usec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name, fop_lat_min);
- ios_log(this, logfp, "\"%s.%s.fop.%s.latency_max_usec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_max_usec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name, fop_lat_max);
fop_ave_usec_sum += fop_lat_ave;
@@ -896,18 +892,17 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
*/
ios_log(this, logfp,
"\"%s.%s.fop.weighted_latency_ave_usec_nozerofill\": "
- "\"%0.4lf\",",
+ "%0.4lf,",
key_prefix, str_prefix, weighted_fop_ave_usec);
}
- ios_log(this, logfp, "\"%s.%s.fop.weighted_latency_ave_usec\": \"%0.4lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.weighted_latency_ave_usec\": %0.4lf,",
key_prefix, str_prefix, weighted_fop_ave_usec);
- ios_log(this, logfp, "\"%s.%s.fop.weighted_fop_count\": \"%ld\",",
- key_prefix, str_prefix, total_fop_hits);
+ ios_log(this, logfp, "\"%s.%s.fop.weighted_fop_count\": %ld,", key_prefix,
+ str_prefix, total_fop_hits);
fop_ave_usec = fop_ave_usec_sum / GF_FOP_MAXVALUE;
- ios_log(this, logfp,
- "\"%s.%s.fop.unweighted_latency_ave_usec\":\"%0.4lf\",", key_prefix,
- str_prefix, fop_ave_usec);
+ ios_log(this, logfp, "\"%s.%s.fop.unweighted_latency_ave_usec\":%0.4lf,",
+ key_prefix, str_prefix, fop_ave_usec);
for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
lc_fop_name = strdupa(gf_upcall_list[i]);
@@ -916,11 +911,10 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
fop_hits = GF_ATOMIC_GET(stats->upcall_hits[i]);
if (interval == -1) {
- ios_log(this, logfp,
- "\"%s.%s.fop.%s.count\": \"%" GF_PRI_ATOMIC "\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.count\": %" GF_PRI_ATOMIC ",",
key_prefix, str_prefix, lc_fop_name, fop_hits);
} else {
- ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": %0.2lf,",
key_prefix, str_prefix, lc_fop_name,
(double)(fop_hits / interval_sec));
}
@@ -937,7 +931,7 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
dict_foreach_inline(xattr, curr)
{
- ios_log(this, logfp, "\"%s.%s.%s.queue_size\": \"%d\",", key_prefix,
+ ios_log(this, logfp, "\"%s.%s.%s.queue_size\": %d,", key_prefix,
str_prefix, curr->key, data_to_int32(curr->value));
}
@@ -950,23 +944,23 @@ io_stats_dump_global_to_json_logfp(xlator_t *this,
}
if (interval == -1) {
- ios_log(this, logfp, "\"%s.%s.uptime\": \"%" PRId64 "\",", key_prefix,
- str_prefix, (uint64_t)(now->tv_sec - stats->started_at.tv_sec));
+ ios_log(this, logfp, "\"%s.%s.uptime\": %" PRIu64 ",", key_prefix,
+ str_prefix, (uint64_t)(now - stats->started_at));
ios_log(this, logfp,
- "\"%s.%s.bytes_read\": \""
- "%" GF_PRI_ATOMIC "\",",
+ "\"%s.%s.bytes_read\": "
+ "%" GF_PRI_ATOMIC ",",
key_prefix, str_prefix, GF_ATOMIC_GET(stats->data_read));
ios_log(this, logfp,
- "\"%s.%s.bytes_written\": \""
- "%" GF_PRI_ATOMIC "\"",
+ "\"%s.%s.bytes_written\": "
+ "%" GF_PRI_ATOMIC "",
key_prefix, str_prefix, GF_ATOMIC_GET(stats->data_written));
} else {
- ios_log(this, logfp, "\"%s.%s.sample_interval_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.sample_interval_sec\": %0.2lf,",
key_prefix, str_prefix, interval_sec);
- ios_log(this, logfp, "\"%s.%s.bytes_read_per_sec\": \"%0.2lf\",",
+ ios_log(this, logfp, "\"%s.%s.bytes_read_per_sec\": %0.2lf,",
key_prefix, str_prefix,
(double)(GF_ATOMIC_GET(stats->data_read) / interval_sec));
- ios_log(this, logfp, "\"%s.%s.bytes_written_per_sec\": \"%0.2lf\"",
+ ios_log(this, logfp, "\"%s.%s.bytes_written_per_sec\": %0.2lf",
key_prefix, str_prefix,
(double)(GF_ATOMIC_GET(stats->data_written) / interval_sec));
}
@@ -1203,14 +1197,14 @@ out:
int
io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, FILE *logfp)
+ time_t now, int interval, FILE *logfp)
{
int i = 0;
int per_line = 0;
int index = 0;
struct ios_stat_head *list_head = NULL;
struct ios_conf *conf = NULL;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char str_header[128] = {0};
@@ -1226,8 +1220,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
ios_log(this, logfp, "\n=== Cumulative stats ===");
else
ios_log(this, logfp, "\n=== Interval %d stats ===", interval);
- ios_log(this, logfp, " Duration : %" PRId64 " secs",
- (uint64_t)(now->tv_sec - stats->started_at.tv_sec));
+ ios_log(this, logfp, " Duration : %" PRIu64 " secs",
+ (uint64_t)(now - stats->started_at));
ios_log(this, logfp, " BytesRead : %" GF_PRI_ATOMIC,
GF_ATOMIC_GET(stats->data_read));
ios_log(this, logfp, " BytesWritten : %" GF_PRI_ATOMIC "\n",
@@ -1319,11 +1313,8 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
if (interval == -1) {
LOCK(&conf->lock);
{
- gf_time_fmt(timestr, sizeof timestr,
- conf->cumulative.max_openfd_time.tv_sec, gf_timefmt_FT);
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr), ".%" GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &conf->cumulative.max_openfd_time, gf_timefmt_FT);
ios_log(this, logfp,
"Current open fd's: %" PRId64 " Max open fd's: %" PRId64
" time %s",
@@ -1375,10 +1366,10 @@ io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
int
io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, dict_t *dict)
+ time_t now, int interval, dict_t *dict)
{
int ret = 0;
- char key[256] = {0};
+ char key[64] = {0};
uint64_t sec = 0;
int i = 0;
uint64_t count = 0;
@@ -1401,7 +1392,7 @@ io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
interval);
snprintf(key, sizeof(key), "%d-duration", interval);
- sec = (uint64_t)(now->tv_sec - stats->started_at.tv_sec);
+ sec = now - stats->started_at;
ret = dict_set_uint64(dict, key, sec);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
@@ -1524,9 +1515,8 @@ out:
}
int
-io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval,
- struct ios_dump_args *args)
+io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, time_t now,
+ int interval, struct ios_dump_args *args)
{
int ret = -1;
@@ -1584,24 +1574,24 @@ ios_dump_args_init(struct ios_dump_args *args, ios_dump_type_t type,
}
static void
-ios_global_stats_clear(struct ios_global_stats *stats, struct timeval *now)
+ios_global_stats_clear(struct ios_global_stats *stats, time_t now)
{
GF_ASSERT(stats);
GF_ASSERT(now);
memset(stats, 0, sizeof(*stats));
- stats->started_at = *now;
+ stats->started_at = now;
}
int
-io_stats_dump(xlator_t *this, struct ios_dump_args *args, gf1_cli_info_op op,
+io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
gf_boolean_t is_peek)
{
struct ios_conf *conf = NULL;
struct ios_global_stats cumulative = {};
struct ios_global_stats incremental = {};
int increment = 0;
- struct timeval now;
+ time_t now = 0;
GF_ASSERT(this);
GF_ASSERT(args);
@@ -1609,31 +1599,31 @@ io_stats_dump(xlator_t *this, struct ios_dump_args *args, gf1_cli_info_op op,
GF_ASSERT(args->type < IOS_DUMP_TYPE_MAX);
conf = this->private;
+ now = gf_time();
- gettimeofday(&now, NULL);
LOCK(&conf->lock);
{
- if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_CUMULATIVE)
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
cumulative = conf->cumulative;
- if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_INCREMENTAL) {
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL) {
incremental = conf->incremental;
increment = conf->increment;
if (!is_peek) {
increment = conf->increment++;
- ios_global_stats_clear(&conf->incremental, &now);
+ ios_global_stats_clear(&conf->incremental, now);
}
}
}
UNLOCK(&conf->lock);
- if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_CUMULATIVE)
- io_stats_dump_global(this, &cumulative, &now, -1, args);
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
+ io_stats_dump_global(this, &cumulative, now, -1, args);
- if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_INCREMENTAL)
- io_stats_dump_global(this, &incremental, &now, increment, args);
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL)
+ io_stats_dump_global(this, &incremental, now, increment, args);
return 0;
}
@@ -1643,9 +1633,8 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
{
struct ios_conf *conf = NULL;
struct timeval now;
- uint64_t sec = 0;
- uint64_t usec = 0;
int i = 0;
+ double usecs = 0;
uint64_t data_read = 0;
uint64_t data_written = 0;
uint64_t block_count_read = 0;
@@ -1660,23 +1649,15 @@ io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
return 0;
gettimeofday(&now, NULL);
-
- if (iosfd->opened_at.tv_usec > now.tv_usec) {
- now.tv_usec += 1000000;
- now.tv_usec--;
- }
-
- sec = now.tv_sec - iosfd->opened_at.tv_sec;
- usec = now.tv_usec - iosfd->opened_at.tv_usec;
+ usecs = gf_tvdiff(&iosfd->opened_at, &now);
gf_log(this->name, GF_LOG_INFO, "--- fd stats ---");
if (iosfd->filename)
gf_log(this->name, GF_LOG_INFO, " Filename : %s", iosfd->filename);
- if (sec)
- gf_log(this->name, GF_LOG_INFO,
- " Lifetime : %" PRId64 "secs, %" PRId64 "usecs", sec, usec);
+ if (usecs)
+ gf_log(this->name, GF_LOG_INFO, " Lifetime : %lf secs", usecs);
data_read = GF_ATOMIC_GET(iosfd->data_read);
if (data_read)
@@ -1779,9 +1760,7 @@ update_ios_latency(struct ios_conf *conf, call_frame_t *frame,
begin = &frame->begin;
end = &frame->end;
- elapsed = ((end->tv_sec - begin->tv_sec) * 1e9 +
- (end->tv_nsec - begin->tv_nsec)) /
- 1000;
+ elapsed = gf_tsdiff(begin, end) / 1000.0;
update_ios_latency_stats(&conf->cumulative, elapsed, op);
update_ios_latency_stats(&conf->incremental, elapsed, op);
@@ -1796,12 +1775,13 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
{
struct ios_conf *conf = NULL;
int cnt = 0;
- char key[256];
+ char key[32];
+ int keylen;
struct ios_stat_head *list_head = NULL;
struct ios_stat_list *entry = NULL;
int ret = -1;
ios_stats_thru_t index = IOS_STATS_THRU_MAX;
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char *dict_timestr = NULL;
@@ -1820,14 +1800,9 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
ret = dict_set_uint64(resp, "max-open",
conf->cumulative.max_nr_opens);
- gf_time_fmt(timestr, sizeof timestr,
- conf->cumulative.max_openfd_time.tv_sec,
- gf_timefmt_FT);
- if (conf->cumulative.max_openfd_time.tv_sec)
- snprintf(timestr + strlen(timestr),
- sizeof timestr - strlen(timestr),
- ".%" GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &conf->cumulative.max_openfd_time,
+ gf_timefmt_FT);
dict_timestr = gf_strdup(timestr);
if (!dict_timestr)
@@ -1867,7 +1842,7 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
default:
goto out;
}
- ret = dict_set_int32(resp, "top-op", flags);
+ ret = dict_set_int32_sizen(resp, "top-op", flags);
if (!list_cnt)
goto out;
LOCK(&list_head->lock);
@@ -1875,24 +1850,24 @@ io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
list_for_each_entry(entry, &list_head->iosstats->list, list)
{
cnt++;
- snprintf(key, 256, "%s-%d", "filename", cnt);
- ret = dict_set_str(resp, key, entry->iosstat->filename);
+ keylen = snprintf(key, sizeof(key), "filename-%d", cnt);
+ ret = dict_set_strn(resp, key, keylen, entry->iosstat->filename);
if (ret)
goto unlock_list_head;
- snprintf(key, 256, "%s-%d", "value", cnt);
+ snprintf(key, sizeof(key), "value-%d", cnt);
ret = dict_set_uint64(resp, key, entry->value);
if (ret)
goto unlock_list_head;
if (index != IOS_STATS_THRU_MAX) {
- snprintf(key, 256, "%s-%d", "time-sec", cnt);
- ret = dict_set_int32(
- resp, key,
+ keylen = snprintf(key, sizeof(key), "time-sec-%d", cnt);
+ ret = dict_set_int32n(
+ resp, key, keylen,
entry->iosstat->thru_counters[index].time.tv_sec);
if (ret)
goto unlock_list_head;
- snprintf(key, 256, "%s-%d", "time-usec", cnt);
- ret = dict_set_int32(
- resp, key,
+ keylen = snprintf(key, sizeof(key), "time-usec-%d", cnt);
+ ret = dict_set_int32n(
+ resp, key, keylen,
entry->iosstat->thru_counters[index].time.tv_usec);
if (ret)
goto unlock_list_head;
@@ -1907,7 +1882,7 @@ unlock_list_head:
* failed. */
if (ret)
goto out;
- ret = dict_set_int32(resp, "members", cnt);
+ ret = dict_set_int32_sizen(resp, "members", cnt);
out:
return ret;
}
@@ -2123,6 +2098,19 @@ io_stats_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
int
+io_stats_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, COPY_FILE_RANGE);
+
+ STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, stbuf,
+ prebuf_dst, postbuf_dst, xdata);
+ return 0;
+}
+
+int
io_stats_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
dict_t *xdata)
@@ -2877,6 +2865,19 @@ io_stats_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
}
int
+io_stats_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off_in, fd_t *fd_out, off_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_copy_file_range_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->copy_file_range, fd_in, off_in, fd_out,
+ off_out, len, flags, xdata);
+ return 0;
+}
+
+int
io_stats_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY(frame);
@@ -3009,7 +3010,7 @@ conditional_dump(dict_t *dict, char *key, data_t *value, void *data)
} else {
(void)ios_dump_args_init(&args, IOS_DUMP_TYPE_FILE, logfp);
}
- io_stats_dump(this, &args, GF_CLI_INFO_ALL, _gf_false);
+ io_stats_dump(this, &args, GF_IOS_INFO_ALL, _gf_false);
fclose(logfp);
return 0;
}
@@ -3112,7 +3113,7 @@ _ios_dump_thread(xlator_t *this)
stats_logfp = fopen(stats_filename, "w+");
if (stats_logfp) {
(void)ios_dump_args_init(&args, conf->dump_format, stats_logfp);
- io_stats_dump(this, &args, GF_CLI_INFO_ALL, _gf_false);
+ io_stats_dump(this, &args, GF_IOS_INFO_ALL, _gf_false);
fclose(stats_logfp);
log_stats_fopen_failure = _gf_true;
} else if (log_stats_fopen_failure) {
@@ -3573,26 +3574,21 @@ ios_destroy_top_stats(struct ios_conf *conf)
return;
}
-static int
+static void
io_stats_clear(struct ios_conf *conf)
{
- struct timeval now;
- int ret = -1;
+ time_t now = 0;
GF_ASSERT(conf);
+ now = gf_time();
- if (!gettimeofday(&now, NULL)) {
- LOCK(&conf->lock);
- {
- ios_global_stats_clear(&conf->cumulative, &now);
- ios_global_stats_clear(&conf->incremental, &now);
- conf->increment = 0;
- }
- UNLOCK(&conf->lock);
- ret = 0;
+ LOCK(&conf->lock);
+ {
+ ios_global_stats_clear(&conf->cumulative, now);
+ ios_global_stats_clear(&conf->incremental, now);
+ conf->increment = 0;
}
-
- return ret;
+ UNLOCK(&conf->lock);
}
int32_t
@@ -3658,15 +3654,15 @@ io_priv(xlator_t *this)
}
static void
-ios_set_log_format_code(struct ios_conf *conf)
+ios_set_log_format_code(struct ios_conf *conf, char *dump_format_str)
{
- if (strcmp(conf->dump_format_str, "json") == 0)
+ if (strcmp(dump_format_str, "json") == 0)
conf->dump_format = IOS_DUMP_TYPE_JSON_FILE;
- else if (strcmp(conf->dump_format_str, "text") == 0)
+ else if (strcmp(dump_format_str, "text") == 0)
conf->dump_format = IOS_DUMP_TYPE_FILE;
- else if (strcmp(conf->dump_format_str, "dict") == 0)
+ else if (strcmp(dump_format_str, "dict") == 0)
conf->dump_format = IOS_DUMP_TYPE_DICT;
- else if (strcmp(conf->dump_format_str, "samples") == 0)
+ else if (strcmp(dump_format_str, "samples") == 0)
conf->dump_format = IOS_DUMP_TYPE_SAMPLES;
}
@@ -3683,19 +3679,23 @@ xlator_set_loglevel(xlator_t *this, int log_level)
active = ctx->active;
top = active->first;
- if (strcmp(top->type, "protocol/server") || (log_level == -1))
+ if (log_level == -1)
return;
- /* Set log-level for server xlator */
- top->loglevel = log_level;
+ if (ctx->cmd_args.brick_mux) {
+ /* Set log-level for all brick xlators */
+ top->loglevel = log_level;
- /* Set log-level for parent xlator */
- if (this->parents)
- this->parents->xlator->loglevel = log_level;
+ /* Set log-level for parent xlator */
+ if (this->parents)
+ this->parents->xlator->loglevel = log_level;
- while (trav) {
- trav->loglevel = log_level;
- trav = trav->next;
+ while (trav) {
+ trav->loglevel = log_level;
+ trav = trav->next;
+ }
+ } else {
+ gf_log_set_loglevel(this->ctx, log_level);
}
}
@@ -3707,6 +3707,7 @@ reconfigure(xlator_t *this, dict_t *options)
char *sys_log_str = NULL;
char *log_format_str = NULL;
char *logger_str = NULL;
+ char *dump_format_str = NULL;
int sys_log_level = -1;
char *log_str = NULL;
int log_level = -1;
@@ -3715,6 +3716,7 @@ reconfigure(xlator_t *this, dict_t *options)
uint32_t log_buf_size = 0;
uint32_t log_flush_timeout = 0;
int32_t old_dump_interval;
+ int32_t threads;
if (!this || !this->private)
goto out;
@@ -3751,9 +3753,8 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("ios-sample-interval", conf->ios_sample_interval, options,
int32, out);
- GF_OPTION_RECONF("ios-dump-format", conf->dump_format_str, options, str,
- out);
- ios_set_log_format_code(conf);
+ GF_OPTION_RECONF("ios-dump-format", dump_format_str, options, str, out);
+ ios_set_log_format_code(conf, dump_format_str);
GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, options,
int32, out);
GF_OPTION_RECONF("sys-log-level", sys_log_str, options, str, out);
@@ -3788,6 +3789,9 @@ reconfigure(xlator_t *this, dict_t *options)
out);
gf_log_set_log_flush_timeout(log_flush_timeout);
+ GF_OPTION_RECONF("threads", threads, options, int32, out);
+ gf_async_adjust_threads(threads);
+
ret = 0;
out:
gf_log(this ? this->name : "io-stats", GF_LOG_DEBUG,
@@ -3825,7 +3829,7 @@ ios_conf_destroy(struct ios_conf *conf)
_ios_destroy_dump_thread(conf);
ios_destroy_sample_buf(conf->ios_sample_buf);
LOCK_DESTROY(&conf->lock);
- GF_FREE(conf->dnscache);
+ gf_dnscache_deinit(conf->dnscache);
GF_FREE(conf);
}
@@ -3848,16 +3852,18 @@ ios_init_stats(struct ios_global_stats *stats)
for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++)
GF_ATOMIC_INIT(stats->upcall_hits[i], 0);
- gettimeofday(&stats->started_at, NULL);
+ stats->started_at = gf_time();
}
int
init(xlator_t *this)
{
struct ios_conf *conf = NULL;
+ char *volume_id = NULL;
char *sys_log_str = NULL;
char *logger_str = NULL;
char *log_format_str = NULL;
+ char *dump_format_str = NULL;
int logger = -1;
int log_format = -1;
int sys_log_level = -1;
@@ -3866,6 +3872,7 @@ init(xlator_t *this)
int ret = -1;
uint32_t log_buf_size = 0;
uint32_t log_flush_timeout = 0;
+ int32_t threads;
if (!this)
return -1;
@@ -3893,6 +3900,11 @@ init(xlator_t *this)
conf->unique_id = this->name;
}
+ ret = dict_get_strn(this->options, "volume-id", SLEN("volume-id"),
+ &volume_id);
+ if (!ret) {
+ strncpy(this->graph->volume_id, volume_id, GF_UUID_BUF_SIZE);
+ }
/*
* Init it just after calloc, so that we are sure the lock is inited
* in case of error paths.
@@ -3918,8 +3930,8 @@ init(xlator_t *this)
GF_OPTION_INIT("ios-sample-interval", conf->ios_sample_interval, int32,
out);
- GF_OPTION_INIT("ios-dump-format", conf->dump_format_str, str, out);
- ios_set_log_format_code(conf);
+ GF_OPTION_INIT("ios-dump-format", dump_format_str, str, out);
+ ios_set_log_format_code(conf, dump_format_str);
GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32,
out);
@@ -3933,6 +3945,10 @@ init(xlator_t *this)
GF_OPTION_INIT("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, int32,
out);
conf->dnscache = gf_dnscache_init(conf->ios_dnscache_ttl_sec);
+ if (!conf->dnscache) {
+ ret = -1;
+ goto out;
+ }
GF_OPTION_INIT("sys-log-level", sys_log_str, str, out);
if (sys_log_str) {
@@ -3965,6 +3981,9 @@ init(xlator_t *this)
GF_OPTION_INIT("log-flush-timeout", log_flush_timeout, time, out);
gf_log_set_log_flush_timeout(log_flush_timeout);
+ GF_OPTION_INIT("threads", threads, int32, out);
+ gf_async_adjust_threads(threads);
+
this->private = conf;
if (conf->ios_dump_interval > 0) {
conf->dump_thread_running = _gf_true;
@@ -4069,8 +4088,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
}
} else {
ret = dict_get_int32(dict, "info-op", &op);
- if (ret || op < GF_CLI_INFO_ALL || GF_CLI_INFO_CLEAR < op)
- op = GF_CLI_INFO_ALL;
+ if (ret || op < GF_IOS_INFO_ALL || GF_IOS_INFO_CLEAR < op)
+ op = GF_IOS_INFO_ALL;
ret = dict_set_int32(output, "info-op", op);
if (ret) {
@@ -4079,13 +4098,10 @@ notify(xlator_t *this, int32_t event, void *data, ...)
goto out;
}
- if (GF_CLI_INFO_CLEAR == op) {
- ret = io_stats_clear(this->private);
- if (ret)
- gf_log(this->name, GF_LOG_ERROR,
- "Failed to clear info stats");
+ if (GF_IOS_INFO_CLEAR == op) {
+ io_stats_clear(this->private);
- ret = dict_set_int32(output, "stats-cleared", ret ? 0 : 1);
+ ret = dict_set_int32(output, "stats-cleared", 1);
if (ret)
gf_log(this->name, GF_LOG_ERROR,
"Failed to set stats-cleared"
@@ -4193,6 +4209,7 @@ struct xlator_fops fops = {
.getactivelk = io_stats_getactivelk,
.setactivelk = io_stats_setactivelk,
.compound = io_stats_compound,
+ .copy_file_range = io_stats_copy_file_range,
};
struct xlator_cbks cbks = {
@@ -4407,6 +4424,57 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.default_value = "/no/such/path",
.description = "Unique ID for our files."},
+ {.key = {"global-threading"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"io-stats", "threading"},
+ .description = "This option enables the global threading support for "
+ "bricks. If enabled, it's recommended to also enable "
+ "'performance.iot-pass-through'"},
+ {.key = {"threads"}, .type = GF_OPTION_TYPE_INT},
+ {.key = {"brick-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "16",
+ .min = 0,
+ .max = GF_ASYNC_MAX_THREADS,
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats", "threading"},
+ .description = "When global threading is used, this value determines the "
+ "maximum amount of threads that can be created on bricks"},
+ {.key = {"client-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "16",
+ .min = 0,
+ .max = GF_ASYNC_MAX_THREADS,
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-stats", "threading"},
+ .description = "When global threading is used, this value determines the "
+ "maximum amount of threads that can be created on clients"},
+ {.key = {"volume-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_7_1},
+ .tags = {"global", "volume-id"},
+ .description =
+ "This option points to the 'unique' UUID particular to this "
+ "volume, which would be set in 'graph->volume_id'"},
{.key = {NULL}},
+};
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "io-stats",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/debug/sink/src/sink.c b/xlators/debug/sink/src/sink.c
index fbbdd3a4847..9822bbb732e 100644
--- a/xlators/debug/sink/src/sink.c
+++ b/xlators/debug/sink/src/sink.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
int32_t
init(xlator_t *this)
@@ -80,3 +80,15 @@ struct xlator_cbks cbks = {};
struct volume_options options[] = {
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "sink",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
index cf05a77b9f1..18a7e0414a6 100644
--- a/xlators/debug/trace/src/trace-mem-types.h
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __TRACE_MEM_TYPES_H__
#define __TRACE_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_trace_mem_types_ {
gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 0aca3a9a5bb..6ed0ca00342 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -22,13 +22,13 @@
static void
trace_stat_to_str(struct iatt *buf, char *str, size_t len)
{
- char atime_buf[200] = {
+ char atime_buf[GF_TIMESTR_SIZE] = {
0,
};
- char mtime_buf[200] = {
+ char mtime_buf[GF_TIMESTR_SIZE] = {
0,
};
- char ctime_buf[200] = {
+ char ctime_buf[GF_TIMESTR_SIZE] = {
0,
};
@@ -64,7 +64,7 @@ trace_stat_to_str(struct iatt *buf, char *str, size_t len)
int
dump_history_trace(circular_buffer_t *cb, void *data)
{
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
@@ -72,9 +72,7 @@ dump_history_trace(circular_buffer_t *cb, void *data)
gettimeofday () fails, it's safe to check tm and then dump the time
at which the entry was added to the buffer */
- gf_time_fmt(timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_Ymd_T);
- snprintf(timestr + strlen(timestr), 256 - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_Ymd_T);
gf_proc_dump_write("TIME", "%s", timestr);
gf_proc_dump_write("FOP", "%s\n", (char *)cb->data);
@@ -2209,10 +2207,10 @@ int
trace_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char actime_str[256] = {
+ char actime_str[GF_TIMESTR_SIZE] = {
0,
};
- char modtime_str[256] = {
+ char modtime_str[GF_TIMESTR_SIZE] = {
0,
};
trace_conf_t *conf = NULL;
@@ -2278,10 +2276,10 @@ int
trace_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char actime_str[256] = {
+ char actime_str[GF_TIMESTR_SIZE] = {
0,
};
- char modtime_str[256] = {
+ char modtime_str[GF_TIMESTR_SIZE] = {
0,
};
trace_conf_t *conf = NULL;
@@ -3520,3 +3518,17 @@ struct volume_options options[] = {
};
struct xlator_dumpops dumpops = {.history = trace_dump_history};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "trace",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
index cd73e0f34ed..b16304799da 100644
--- a/xlators/debug/trace/src/trace.h
+++ b/xlators/debug/trace/src/trace.h
@@ -10,14 +10,14 @@
#include <time.h>
#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "event-history.h"
-#include "logging.h"
-#include "circ-buff.h"
-#include "statedump.h"
-#include "options.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/event-history.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/circ-buff.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/options.h>
#define TRACE_DEFAULT_HISTORY_SIZE 1024
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
deleted file mode 100644
index 36efc6698bd..00000000000
--- a/xlators/encryption/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = rot-13 crypt
-
-CLEANFILES =
diff --git a/xlators/encryption/crypt/Makefile.am b/xlators/encryption/crypt/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/encryption/crypt/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
deleted file mode 100644
index 05fd3d5096b..00000000000
--- a/xlators/encryption/crypt/src/Makefile.am
+++ /dev/null
@@ -1,26 +0,0 @@
-if ENABLE_CRYPT_XLATOR
-
-xlator_LTLIBRARIES = crypt.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
-
-crypt_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-
-crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
-crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -lssl -lcrypto
-
-noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-CLEANFILES =
-
-else
-
-noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
-noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
-
-endif
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
deleted file mode 100644
index 8e9c4940abd..00000000000
--- a/xlators/encryption/crypt/src/atom.c
+++ /dev/null
@@ -1,861 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "crypt-common.h"
-#include "crypt.h"
-
-/*
- * Glossary
- *
- *
- * cblock (or cipher block). A logical unit in a file.
- * cblock size is defined as the number of bits
- * in an input (or output) block of the block
- * cipher (*). Cipher block size is a property of
- * cipher algorithm. E.g. cblock size is 64 bits
- * for DES, 128 bits for AES, etc.
- *
- * atomic cipher A cipher algorithm, which requires some chunks of
- * algorithm text to be padded at left and(or) right sides before
- * cipher transaform.
- *
- *
- * block (atom) Minimal chunk of file's data, which doesn't require
- * padding. We'll consider logical units in a file of
- * block size (atom size).
- *
- * cipher algorithm Atomic cipher algorithm, which requires the last
- * with EOF issue incomplete cblock in a file to be padded with some
- * data (usually zeros).
- *
- *
- * operation, which reading/writing from offset, which is not aligned to
- * forms a gap at to atom size
- * the beginning
- *
- *
- * operation, which reading/writing count bytes starting from offset off,
- * forms a gap at so that off+count is not aligned to atom_size
- * the end
- *
- * head block the first atom affected by an operation, which forms
- * a gap at the beginning, or(and) at the end.
- * Сomment. Head block has at least one gap (either at
- * the beginning, or at the end)
- *
- *
- * tail block the last atom different from head, affected by an
- * operation, which forms a gap at the end.
- * Сomment: Tail block has exactly one gap (at the end).
- *
- *
- * partial block head or tail block
- *
- *
- * full block block without gaps.
- *
- *
- * (*) Recommendation for Block Cipher Modes of Operation
- * Methods and Techniques
- * NIST Special Publication 800-38A Edition 2001
- */
-
-/*
- * atom->offset_at()
- */
-static off_t
-offset_at_head(struct avec_config *conf)
-{
- return conf->aligned_offset;
-}
-
-static off_t
-offset_at_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_head(get_hole_conf(frame));
-}
-
-static off_t
-offset_at_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_head(get_data_conf(frame));
-}
-
-static off_t
-offset_at_tail(struct avec_config *conf, struct object_cipher_info *object)
-{
- return conf->aligned_offset +
- (conf->off_in_head ? get_atom_size(object) : 0) +
- (conf->nr_full_blocks << get_atom_bits(object));
-}
-
-static off_t
-offset_at_hole_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_tail(get_hole_conf(frame), object);
-}
-
-static off_t
-offset_at_data_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_tail(get_data_conf(frame), object);
-}
-
-static off_t
-offset_at_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- return conf->aligned_offset +
- (conf->off_in_head ? get_atom_size(object) : 0);
-}
-
-static off_t
-offset_at_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_full(get_data_conf(frame), object);
-}
-
-static off_t
-offset_at_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_at_full(get_hole_conf(frame), object);
-}
-
-/*
- * atom->io_size_nopad()
- */
-
-static uint32_t
-io_size_nopad_head(struct avec_config *conf, struct object_cipher_info *object)
-{
- uint32_t gap_at_beg;
- uint32_t gap_at_end;
-
- check_head_block(conf);
-
- gap_at_beg = conf->off_in_head;
-
- if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0)
- gap_at_end = 0;
- else
- gap_at_end = get_atom_size(object) - conf->off_in_tail;
-
- return get_atom_size(object) - (gap_at_beg + gap_at_end);
-}
-
-static uint32_t
-io_size_nopad_tail(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_tail_block(conf);
- return conf->off_in_tail;
-}
-
-static uint32_t
-io_size_nopad_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_full_block(conf);
- return get_atom_size(object);
-}
-
-static uint32_t
-io_size_nopad_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_head(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_head(get_hole_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_data_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_tail(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_tail(get_hole_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_full(get_data_conf(frame), object);
-}
-
-static uint32_t
-io_size_nopad_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return io_size_nopad_full(get_hole_conf(frame), object);
-}
-
-static uint32_t
-offset_in_head(struct avec_config *conf)
-{
- check_cursor_head(conf);
-
- return conf->off_in_head;
-}
-
-static uint32_t
-offset_in_tail(call_frame_t *frame, struct object_cipher_info *object)
-{
- return 0;
-}
-
-static uint32_t
-offset_in_full(struct avec_config *conf, struct object_cipher_info *object)
-{
- check_cursor_full(conf);
-
- if (has_head_block(conf))
- return (conf->cursor - 1) << get_atom_bits(object);
- else
- return conf->cursor << get_atom_bits(object);
-}
-
-static uint32_t
-offset_in_data_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_head(get_data_conf(frame));
-}
-
-static uint32_t
-offset_in_hole_head(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_head(get_hole_conf(frame));
-}
-
-static uint32_t
-offset_in_data_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_full(get_data_conf(frame), object);
-}
-
-static uint32_t
-offset_in_hole_full(call_frame_t *frame, struct object_cipher_info *object)
-{
- return offset_in_full(get_hole_conf(frame), object);
-}
-
-/*
- * atom->rmw()
- */
-/*
- * Pre-conditions:
- * @vec contains plain text of the latest
- * version.
- *
- * Uptodate gaps of the @partial block with
- * this plain text, encrypt the whole block
- * and write the result to disk.
- */
-static int32_t
-rmw_partial_block(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- struct rmw_atom *atom)
-{
- size_t was_read = 0;
- uint64_t file_size;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- struct iovec *partial = atom->get_iovec(frame, 0);
- struct avec_config *conf = atom->get_config(frame);
- end_writeback_handler_t end_writeback_partial_block;
-#if DEBUG_CRYPT
- gf_boolean_t check_last_cblock = _gf_false;
-#endif
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto exit;
-
- file_size = local->cur_file_size;
- was_read = op_ret;
-
- if (atom->locality == HEAD_ATOM && conf->off_in_head) {
- /*
- * head atom with a non-uptodate gap
- * at the beginning
- *
- * fill the gap with plain text of the
- * latest version. Convert a part of hole
- * (if any) to zeros.
- */
- int32_t i;
- int32_t copied = 0;
- int32_t to_gap; /* amount of data needed to uptodate
- the gap at the beginning */
-#if 0
- int32_t hole = 0; /* The part of the hole which
- * got in the head block */
-#endif /* 0 */
- to_gap = conf->off_in_head;
-
- if (was_read < to_gap) {
- if (file_size > offset_at_head(conf) + was_read) {
- /*
- * It is impossible to uptodate
- * head block: too few bytes have
- * been read from disk, so that
- * partial write is impossible.
- *
- * It could happen because of many
- * reasons: IO errors, (meta)data
- * corruption in the local file system,
- * etc.
- */
- gf_log(this->name, GF_LOG_WARNING,
- "Can not uptodate a gap at the beginning");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
-#if 0
- hole = to_gap - was_read;
-#endif /* 0 */
- to_gap = was_read;
- }
- /*
- * uptodate the gap at the beginning
- */
- for (i = 0; i < count && copied < to_gap; i++) {
- int32_t to_copy;
-
- to_copy = vec[i].iov_len;
- if (to_copy > to_gap - copied)
- to_copy = to_gap - copied;
-
- memcpy(partial->iov_base, vec[i].iov_base, to_copy);
- copied += to_copy;
- }
-#if 0
- /*
- * If possible, convert part of the
- * hole, which got in the head block
- */
- ret = TRY_LOCK(&local->hole_lock);
- if (!ret) {
- if (local->hole_handled)
- /*
- * already converted by
- * crypt_writev_cbk()
- */
- UNLOCK(&local->hole_lock);
- else {
- /*
- * convert the part of the hole
- * which got in the head block
- * to zeros.
- *
- * Update the orig_offset to make
- * sure writev_cbk() won't care
- * about this part of the hole.
- *
- */
- memset(partial->iov_base + to_gap, 0, hole);
-
- conf->orig_offset -= hole;
- conf->orig_size += hole;
- UNLOCK(&local->hole_lock);
- }
- }
- else /*
- * conversion is being performed
- * by crypt_writev_cbk()
- */
- ;
-#endif /* 0 */
- }
- if (atom->locality == TAIL_ATOM ||
- (!has_tail_block(conf) && conf->off_in_tail)) {
- /*
- * tail atom, or head atom with a non-uptodate
- * gap at the end.
- *
- * fill the gap at the end of the block
- * with plain text of the latest version.
- * Pad the result, (if needed)
- */
- int32_t i;
- int32_t to_gap;
- int copied;
- off_t off_in_tail;
- int32_t to_copy;
-
- off_in_tail = conf->off_in_tail;
- to_gap = conf->gap_in_tail;
-
- if (to_gap && was_read < off_in_tail + to_gap) {
- /*
- * It is impossible to uptodate
- * the gap at the end: too few bytes
- * have been read from disk, so that
- * partial write is impossible.
- *
- * It could happen because of many
- * reasons: IO errors, (meta)data
- * corruption in the local file system,
- * etc.
- */
- gf_log(this->name, GF_LOG_WARNING,
- "Can not uptodate a gap at the end");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- /*
- * uptodate the gap at the end
- */
- copied = 0;
- to_copy = to_gap;
- for (i = count - 1; i >= 0 && to_copy > 0; i--) {
- uint32_t from_vec, off_in_vec;
-
- off_in_vec = 0;
- from_vec = vec[i].iov_len;
- if (from_vec > to_copy) {
- off_in_vec = from_vec - to_copy;
- from_vec = to_copy;
- }
- memcpy(partial->iov_base + off_in_tail + to_gap - copied - from_vec,
- vec[i].iov_base + off_in_vec, from_vec);
-
- gf_log(
- this->name, GF_LOG_DEBUG,
- "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
- (int)from_vec, (int)off_in_tail + to_gap - copied - from_vec,
- (int)off_in_vec);
-
- copied += from_vec;
- to_copy -= from_vec;
- }
- partial->iov_len = off_in_tail + to_gap;
-
- if (object_alg_should_pad(object)) {
- int32_t resid = 0;
- resid = partial->iov_len & (object_alg_blksize(object) - 1);
- if (resid) {
- /*
- * append a new EOF padding
- */
- local->eof_padding_size = object_alg_blksize(object) - resid;
-
- gf_log(this->name, GF_LOG_DEBUG, "set padding size %d",
- local->eof_padding_size);
-
- memset(partial->iov_base + partial->iov_len, 1,
- local->eof_padding_size);
- partial->iov_len += local->eof_padding_size;
-#if DEBUG_CRYPT
- gf_log(this->name, GF_LOG_DEBUG,
- "pad cblock with %d zeros:", local->eof_padding_size);
- dump_cblock(this, (unsigned char *)partial->iov_base +
- partial->iov_len -
- object_alg_blksize(object));
- check_last_cblock = _gf_true;
-#endif
- }
- }
- }
- /*
- * encrypt the whole block
- */
- encrypt_aligned_iov(object, partial, 1, atom->offset_at(frame, object));
-#if DEBUG_CRYPT
- if (check_last_cblock == _gf_true) {
- gf_log(this->name, GF_LOG_DEBUG, "encrypt last cblock with offset %llu",
- (unsigned long long)atom->offset_at(frame, object));
- dump_cblock(this, (unsigned char *)partial->iov_base +
- partial->iov_len - object_alg_blksize(object));
- }
-#endif
- set_local_io_params_writev(frame, object, atom,
- atom->offset_at(frame, object),
- iov_length(partial, 1));
- /*
- * write the whole block to disk
- */
- end_writeback_partial_block = dispatch_end_writeback(local->fop);
- conf->cursor++;
- STACK_WIND(frame, end_writeback_partial_block, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, local->fd, partial, 1,
- atom->offset_at(frame, object), local->flags, local->iobref_data,
- local->xdata);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "submit partial block: %d bytes from %d offset",
- (int)iov_length(partial, 1), (int)atom->offset_at(frame, object));
-exit:
- return 0;
-}
-
-/*
- * Perform a (read-)modify-write sequence.
- * This should be performed only after approval
- * of upper server-side manager, i.e. the caller
- * needs to make sure this is his turn to rmw.
- */
-void
-submit_partial(call_frame_t *frame, xlator_t *this, fd_t *fd,
- atom_locality_type ltype)
-{
- int32_t ret;
- dict_t *dict;
- struct rmw_atom *atom;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- atom = atom_by_types(local->active_setup, ltype);
- /*
- * To perform the "read" component of the read-modify-write
- * sequence the crypt translator does stack_wind to itself.
- *
- * Pass current file size to crypt_readv()
- */
- dict = dict_new();
- if (!dict) {
- /*
- * FIXME: Handle the error
- */
- gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
- return;
- }
- ret = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- /*
- * FIXME: Handle the error
- */
- dict_unref(dict);
- gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
- goto exit;
- }
- STACK_WIND(frame, atom->rmw, this, this->fops->readv, /* crypt_readv */
- fd, atom->count_to_uptodate(frame, object), /* count */
- atom->offset_at(frame, object), /* offset to read from */
- 0, dict);
-exit:
- dict_unref(dict);
-}
-
-/*
- * submit blocks of FULL_ATOM type
- */
-void
-submit_full(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
- uint32_t count; /* total number of full blocks to submit */
- uint32_t granularity; /* number of blocks to submit in one iteration */
-
- uint64_t off_in_file; /* start offset in the file, bytes */
- uint32_t off_in_atom; /* start offset in the atom, blocks */
- uint32_t blocks_written = 0; /* blocks written for this submit */
-
- struct avec_config *conf = atom->get_config(frame);
- end_writeback_handler_t end_writeback_full_block;
- /*
- * Write full blocks by groups of granularity size.
- */
- end_writeback_full_block = dispatch_end_writeback(local->fop);
-
- if (is_ordered_mode(frame)) {
- uint32_t skip = has_head_block(conf) ? 1 : 0;
- count = 1;
- granularity = 1;
- /*
- * calculate start offset using cursor value;
- * here we should take into account head block,
- * which corresponds to cursor value 0.
- */
- off_in_file = atom->offset_at(frame, object) +
- ((conf->cursor - skip) << get_atom_bits(object));
- off_in_atom = conf->cursor - skip;
- } else {
- /*
- * in parallel mode
- */
- count = conf->nr_full_blocks;
- granularity = MAX_IOVEC;
- off_in_file = atom->offset_at(frame, object);
- off_in_atom = 0;
- }
- while (count) {
- uint32_t blocks_to_write = count;
-
- if (blocks_to_write > granularity)
- blocks_to_write = granularity;
- if (conf->type == HOLE_ATOM)
- /*
- * reset iovec before encryption
- */
- memset(atom->get_iovec(frame, 0)->iov_base, 0,
- get_atom_size(object));
- /*
- * encrypt the group
- */
- encrypt_aligned_iov(
- object, atom->get_iovec(frame, off_in_atom + blocks_written),
- blocks_to_write,
- off_in_file + (blocks_written << get_atom_bits(object)));
-
- set_local_io_params_writev(
- frame, object, atom,
- off_in_file + (blocks_written << get_atom_bits(object)),
- blocks_to_write << get_atom_bits(object));
-
- conf->cursor += blocks_to_write;
-
- STACK_WIND(frame, end_writeback_full_block, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, local->fd,
- atom->get_iovec(frame, off_in_atom + blocks_written),
- blocks_to_write,
- off_in_file + (blocks_written << get_atom_bits(object)),
- local->flags,
- local->iobref_data ? local->iobref_data : local->iobref,
- local->xdata);
-
- gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
- blocks_to_write,
- (int)(off_in_file + (blocks_written << get_atom_bits(object))));
-
- count -= blocks_to_write;
- blocks_written += blocks_to_write;
- }
- return;
-}
-
-static int32_t
-rmw_data_head(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(DATA_ATOM, HEAD_ATOM));
-}
-
-static int32_t
-rmw_data_tail(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(DATA_ATOM, TAIL_ATOM));
-}
-
-static int32_t
-rmw_hole_head(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(HOLE_ATOM, HEAD_ATOM));
-}
-
-static int32_t
-rmw_hole_tail(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- return rmw_partial_block(frame, cookie, this, op_ret, op_errno, vec, count,
- stbuf, iobref,
- atom_by_types(HOLE_ATOM, TAIL_ATOM));
-}
-
-/*
- * atom->count_to_uptodate()
- */
-static uint32_t
-count_to_uptodate_head(struct avec_config *conf,
- struct object_cipher_info *object)
-{
- if (conf->acount == 1 && conf->off_in_tail)
- return get_atom_size(object);
- else
- /* there is no need to read the whole head block */
- return conf->off_in_head;
-}
-
-static uint32_t
-count_to_uptodate_tail(struct avec_config *conf,
- struct object_cipher_info *object)
-{
- /* we need to read the whole tail block */
- return get_atom_size(object);
-}
-
-static uint32_t
-count_to_uptodate_data_head(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_head(get_data_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_data_tail(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_tail(get_data_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_hole_head(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_head(get_hole_conf(frame), object);
-}
-
-static uint32_t
-count_to_uptodate_hole_tail(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- return count_to_uptodate_tail(get_hole_conf(frame), object);
-}
-
-/* atom->get_config() */
-
-static struct avec_config *
-get_config_data(call_frame_t *frame)
-{
- return &((crypt_local_t *)frame->local)->data_conf;
-}
-
-static struct avec_config *
-get_config_hole(call_frame_t *frame)
-{
- return &((crypt_local_t *)frame->local)->hole_conf;
-}
-
-/*
- * atom->get_iovec()
- */
-static struct iovec *
-get_iovec_hole_head(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec;
-}
-
-static struct iovec *
-get_iovec_hole_full(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0);
-}
-
-static struct iovec *
-get_iovec_hole_tail(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_hole_conf(frame);
-
- return conf->avec + (conf->blocks_in_pool - 1);
-}
-
-static struct iovec *
-get_iovec_data_head(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec;
-}
-
-static struct iovec *
-get_iovec_data_full(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0) + count;
-}
-
-static struct iovec *
-get_iovec_data_tail(call_frame_t *frame, uint32_t count)
-{
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->avec + (conf->off_in_head ? 1 : 0) + conf->nr_full_blocks;
-}
-
-static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
- [DATA_ATOM][HEAD_ATOM] = {.locality = HEAD_ATOM,
- .rmw = rmw_data_head,
- .offset_at = offset_at_data_head,
- .offset_in = offset_in_data_head,
- .get_iovec = get_iovec_data_head,
- .io_size_nopad = io_size_nopad_data_head,
- .count_to_uptodate = count_to_uptodate_data_head,
- .get_config = get_config_data},
- [DATA_ATOM][TAIL_ATOM] = {.locality = TAIL_ATOM,
- .rmw = rmw_data_tail,
- .offset_at = offset_at_data_tail,
- .offset_in = offset_in_tail,
- .get_iovec = get_iovec_data_tail,
- .io_size_nopad = io_size_nopad_data_tail,
- .count_to_uptodate = count_to_uptodate_data_tail,
- .get_config = get_config_data},
- [DATA_ATOM][FULL_ATOM] = {.locality = FULL_ATOM,
- .offset_at = offset_at_data_full,
- .offset_in = offset_in_data_full,
- .get_iovec = get_iovec_data_full,
- .io_size_nopad = io_size_nopad_data_full,
- .get_config = get_config_data},
- [HOLE_ATOM][HEAD_ATOM] = {.locality = HEAD_ATOM,
- .rmw = rmw_hole_head,
- .offset_at = offset_at_hole_head,
- .offset_in = offset_in_hole_head,
- .get_iovec = get_iovec_hole_head,
- .io_size_nopad = io_size_nopad_hole_head,
- .count_to_uptodate = count_to_uptodate_hole_head,
- .get_config = get_config_hole},
- [HOLE_ATOM][TAIL_ATOM] = {.locality = TAIL_ATOM,
- .rmw = rmw_hole_tail,
- .offset_at = offset_at_hole_tail,
- .offset_in = offset_in_tail,
- .get_iovec = get_iovec_hole_tail,
- .io_size_nopad = io_size_nopad_hole_tail,
- .count_to_uptodate = count_to_uptodate_hole_tail,
- .get_config = get_config_hole},
- [HOLE_ATOM][FULL_ATOM] = {.locality = FULL_ATOM,
- .offset_at = offset_at_hole_full,
- .offset_in = offset_in_hole_full,
- .get_iovec = get_iovec_hole_full,
- .io_size_nopad = io_size_nopad_hole_full,
- .get_config = get_config_hole}};
-
-struct rmw_atom *
-atom_by_types(atom_data_type data, atom_locality_type locality)
-{
- return &atoms[data][locality];
-}
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
deleted file mode 100644
index 123d5c2a631..00000000000
--- a/xlators/encryption/crypt/src/crypt-common.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_COMMON_H__
-#define __CRYPT_COMMON_H__
-
-#define INVAL_SUBVERSION_NUMBER (0xff)
-#define CRYPT_INVAL_OP (GF_FOP_NULL)
-
-#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
-#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
-#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
-#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
-#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
-#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
-#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
-
-/* messages for crypt_open() */
-#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
-#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
-
-#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
-
-#define noop \
- do { \
- ; \
- } while (0)
-#define cassert(cond) \
- ({ \
- switch (-1) { \
- case (cond): \
- case 0: \
- break; \
- } \
- })
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y)) + 1)
-
-/*
- * Format of file's metadata
- */
-struct crypt_format {
- uint8_t loader_id; /* version of metadata loader */
- uint8_t versioned[0]; /* file's metadata of specific version */
-} __attribute__((packed));
-
-typedef enum { AES_CIPHER_ALG, LAST_CIPHER_ALG } cipher_alg_t;
-
-typedef enum { XTS_CIPHER_MODE, LAST_CIPHER_MODE } cipher_mode_t;
-
-typedef enum { MTD_LOADER_V1, LAST_MTD_LOADER } mtd_loader_id;
-
-static inline void
-msgflags_set_mtd_rlock(uint32_t *flags)
-{
- *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
-}
-
-static inline void
-msgflags_set_mtd_wlock(uint32_t *flags)
-{
- *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_rlock(uint32_t *flags)
-{
- return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_wlock(uint32_t *flags)
-{
- return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
-}
-
-static inline gf_boolean_t
-msgflags_check_mtd_lock(uint32_t *flags)
-{
- return msgflags_check_mtd_rlock(flags) || msgflags_check_mtd_wlock(flags);
-}
-
-/*
- * returns number of logical blocks occupied
- * (maybe partially) by @count bytes
- * at offset @start.
- */
-static inline off_t
-logical_blocks_occupied(uint64_t start, off_t count, int blkbits)
-{
- return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
-}
-
-/*
- * are two bytes (represented by offsets @off1
- * and @off2 respectively) in the same logical
- * block.
- */
-static inline int
-in_same_lblock(uint64_t off1, uint64_t off2, int blkbits)
-{
- return off1 >> blkbits == off2 >> blkbits;
-}
-
-static inline void
-dump_cblock(xlator_t *this, unsigned char *buf)
-{
- gf_log(this->name, GF_LOG_DEBUG,
- "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
- (buf)[0], (buf)[1], (buf)[2], (buf)[3], (buf)[4], (buf)[5], (buf)[6],
- (buf)[7], (buf)[8], (buf)[9], (buf)[10], (buf)[11], (buf)[12],
- (buf)[13], (buf)[14], (buf)[15]);
-}
-
-#endif /* __CRYPT_COMMON_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
deleted file mode 100644
index 7e9fb90ed43..00000000000
--- a/xlators/encryption/crypt/src/crypt-mem-types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_MEM_TYPES_H__
-#define __CRYPT_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_crypt_mem_types_ {
- gf_crypt_mt_priv = gf_common_mt_end + 1,
- gf_crypt_mt_inode,
- gf_crypt_mt_data,
- gf_crypt_mt_mtd,
- gf_crypt_mt_loc,
- gf_crypt_mt_iatt,
- gf_crypt_mt_key,
- gf_crypt_mt_iovec,
- gf_crypt_mt_char,
- gf_crypt_mt_local,
- gf_crypt_mt_end,
-};
-
-#endif /* __CRYPT_MEM_TYPES_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
deleted file mode 100644
index 02c4028c087..00000000000
--- a/xlators/encryption/crypt/src/crypt.c
+++ /dev/null
@@ -1,3906 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#include <ctype.h>
-#include <sys/uio.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-#include "crypt-common.h"
-#include "crypt.h"
-
-static void
-init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
-static int32_t
-init_inode_info_tail(struct crypt_inode_info *info,
- struct master_cipher_info *master);
-static int32_t
-prepare_for_submit_hole(call_frame_t *frame, xlator_t *this, uint64_t from,
- off_t size);
-static int32_t
-load_file_size(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-static void
-do_ordered_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype);
-static void
-do_parallel_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype);
-static void
-put_one_call_open(call_frame_t *frame);
-static void
-put_one_call_readv(call_frame_t *frame, xlator_t *this);
-static void
-put_one_call_writev(call_frame_t *frame, xlator_t *this);
-static void
-put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
-static void
-free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
-static void
-free_avec_data(crypt_local_t *local);
-static void
-free_avec_hole(crypt_local_t *local);
-
-static crypt_local_t *
-crypt_alloc_local(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop)
-{
- crypt_local_t *local = NULL;
-
- local = GF_CALLOC(1, sizeof(crypt_local_t), gf_crypt_mt_local);
- if (!local) {
- gf_log(this->name, GF_LOG_ERROR, "out of memory");
- return NULL;
- }
- local->fop = fop;
- LOCK_INIT(&local->hole_lock);
- LOCK_INIT(&local->call_lock);
- LOCK_INIT(&local->rw_count_lock);
-
- frame->local = local;
- return local;
-}
-
-struct crypt_inode_info *
-get_crypt_inode_info(inode_t *inode, xlator_t *this)
-{
- int ret;
- uint64_t value = 0;
- struct crypt_inode_info *info;
-
- ret = inode_ctx_get(inode, this, &value);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_WARNING, "Can not get inode info");
- return NULL;
- }
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "Can not obtain inode info");
- return NULL;
- }
- return info;
-}
-
-static struct crypt_inode_info *
-local_get_inode_info(crypt_local_t *local, xlator_t *this)
-{
- if (local->info)
- return local->info;
- local->info = get_crypt_inode_info(local->fd->inode, this);
- return local->info;
-}
-
-static struct crypt_inode_info *
-alloc_inode_info(crypt_local_t *local, loc_t *loc)
-{
- struct crypt_inode_info *info;
-
- info = GF_CALLOC(1, sizeof(struct crypt_inode_info), gf_crypt_mt_inode);
- if (!info) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log("crypt", GF_LOG_WARNING, "Can not allocate inode info");
- return NULL;
- }
-#if DEBUG_CRYPT
- info->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!info->loc) {
- gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
- GF_FREE(info);
- return NULL;
- }
- if (loc_copy(info->loc, loc)) {
- GF_FREE(info->loc);
- GF_FREE(info);
- return NULL;
- }
-#endif /* DEBUG_CRYPT */
-
- local->info = info;
- return info;
-}
-
-static void
-free_inode_info(struct crypt_inode_info *info)
-{
-#if DEBUG_CRYPT
- loc_wipe(info->loc);
- GF_FREE(info->loc);
-#endif
- memset(info, 0, sizeof(*info));
- GF_FREE(info);
-}
-
-int
-crypt_forget(xlator_t *this, inode_t *inode)
-{
- uint64_t ctx_addr = 0;
- if (!inode_ctx_del(inode, this, &ctx_addr))
- free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
- return 0;
-}
-
-#if DEBUG_CRYPT
-static void
-check_read(call_frame_t *frame, xlator_t *this, int32_t read, struct iovec *vec,
- int32_t count, struct iatt *stbuf)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = get_object_cinfo(local->info);
- struct avec_config *conf = &local->data_conf;
- uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
-
- if (read <= 0)
- return;
- if (read != iov_length(vec, count))
- gf_log("crypt", GF_LOG_DEBUG,
- "op_ret differs from amount of read bytes");
-
- if (object_alg_should_pad(object) &&
- (read & (object_alg_blksize(object) - 1)))
- gf_log("crypt", GF_LOG_DEBUG,
- "bad amount of read bytes (!= 0 mod(cblock size))");
-
- if (conf->aligned_offset + read >
- stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
- gf_log("crypt", GF_LOG_DEBUG, "bad amount of read bytes (too large))");
-}
-
-#define PT_BYTES_TO_DUMP (32)
-static void
-dump_plain_text(crypt_local_t *local, struct iovec *avec)
-{
- int32_t to_dump;
- char str[PT_BYTES_TO_DUMP + 1];
-
- if (!avec)
- return;
- to_dump = avec->iov_len;
- if (to_dump > PT_BYTES_TO_DUMP)
- to_dump = PT_BYTES_TO_DUMP;
- memcpy(str, avec->iov_base, to_dump);
- memset(str + to_dump, '0', 1);
- gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
-}
-
-static int32_t
-data_conf_invariant(struct avec_config *conf)
-{
- return conf->acount == !!has_head_block(conf) + !!has_tail_block(conf) +
- conf->nr_full_blocks;
-}
-
-static int32_t
-hole_conf_invariant(struct avec_config *conf)
-{
- return conf->blocks_in_pool == !!has_head_block(conf) +
- !!has_tail_block(conf) +
- !!has_full_blocks(conf);
-}
-
-static void
-crypt_check_conf(struct avec_config *conf)
-{
- int32_t ret = 0;
- const char *msg;
-
- switch (conf->type) {
- case DATA_ATOM:
- msg = "data";
- ret = data_conf_invariant(conf);
- break;
- case HOLE_ATOM:
- msg = "hole";
- ret = hole_conf_invariant(conf);
- break;
- default:
- msg = "unknown";
- }
- if (!ret)
- gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
-}
-
-static void
-check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
-{
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- uint64_t local_file_size;
-
- switch (local->fop) {
- case GF_FOP_FTRUNCATE:
- return;
- case GF_FOP_WRITE:
- local_file_size = local->new_file_size;
- break;
- case GF_FOP_READ:
- if (parent_is_crypt_xlator(frame, this))
- return;
- local_file_size = local->cur_file_size;
- break;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
- return;
- }
- if (buf->ia_size != round_up(local_file_size, object_alg_blksize(object)))
- gf_log("crypt", GF_LOG_DEBUG,
- "bad ia_size in buf (%llu), should be %llu",
- (unsigned long long)buf->ia_size,
- (unsigned long long)round_up(local_file_size,
- object_alg_blksize(object)));
-}
-
-#else
-#define check_read(frame, this, op_ret, vec, count, stbuf) noop
-#define dump_plain_text(local, avec) noop
-#define crypt_check_conf(conf) noop
-#define check_buf(frame, this, buf) noop
-#endif /* DEBUG_CRYPT */
-
-/*
- * Pre-conditions:
- * @vec represents a ciphertext of expanded size and
- * aligned offset.
- *
- * Compound a temporal vector @avec with block-aligned
- * components, decrypt and fix it up to represent a chunk
- * of data corresponding to the original size and offset.
- * Pass the result to the next translator.
- */
-int32_t
-crypt_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- struct object_cipher_info *object = &local->info->cinfo;
-
- struct iovec *avec;
- uint32_t i;
- uint32_t to_vec;
- uint32_t to_user;
-
- check_buf(frame, this, stbuf);
- check_read(frame, this, op_ret, vec, count, stbuf);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->iobref = iobref_ref(iobref);
-
- local->buf = *stbuf;
- local->buf.ia_size = local->cur_file_size;
-
- if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
- goto put_one_call;
-
- if (conf->orig_offset >= local->cur_file_size) {
- local->op_ret = 0;
- goto put_one_call;
- }
- /*
- * correct config params with real file size
- * and actual amount of bytes read
- */
- set_config_offsets(frame, this, conf->orig_offset, op_ret, DATA_ATOM, 0);
-
- if (conf->orig_offset + conf->orig_size > local->cur_file_size)
- conf->orig_size = local->cur_file_size - conf->orig_offset;
- /*
- * calculate amount of data to be returned
- * to user.
- */
- to_user = op_ret;
- if (conf->aligned_offset + to_user <= conf->orig_offset) {
- gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto put_one_call;
- }
- to_user -= (conf->aligned_offset - conf->orig_offset);
-
- if (to_user > conf->orig_size)
- to_user = conf->orig_size;
- local->rw_count = to_user;
-
- op_errno = set_config_avec_data(this, local, conf, object, vec, count);
- if (op_errno) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- goto put_one_call;
- }
- avec = conf->avec;
-#if DEBUG_CRYPT
- if (conf->off_in_tail != 0 &&
- conf->off_in_tail < object_alg_blksize(object) &&
- object_alg_should_pad(object))
- gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
- conf->off_in_tail);
- if (iov_length(vec, count) != 0 &&
- in_same_lblock(conf->orig_offset + iov_length(vec, count) - 1,
- local->cur_file_size - 1, object_alg_blkbits(object))) {
- gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
- dump_cblock(this, (unsigned char *)(avec[conf->acount - 1].iov_base) +
- avec[conf->acount - 1].iov_len -
- object_alg_blksize(object));
- dump_cblock(this, (unsigned char *)(vec[count - 1].iov_base) +
- vec[count - 1].iov_len -
- object_alg_blksize(object));
- }
-#endif
- decrypt_aligned_iov(object, avec, conf->acount, conf->aligned_offset);
- /*
- * pass proper plain data to user
- */
- avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
- avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
-
- to_vec = to_user;
- for (i = 0; i < conf->acount; i++) {
- if (avec[i].iov_len > to_vec)
- avec[i].iov_len = to_vec;
- to_vec -= avec[i].iov_len;
- }
-put_one_call:
- put_one_call_readv(frame, this);
- return 0;
-}
-
-static int32_t
-do_readv(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_errno = EIO;
- goto error;
- }
- local->cur_file_size = data_to_uint64(data);
-
- get_one_call(frame);
- STACK_WIND(frame, crypt_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, local->fd,
- /*
- * FIXME: read amount can be reduced
- */
- local->data_conf.expanded_size, local->data_conf.aligned_offset,
- local->flags, local->xdata);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- get_one_call(frame);
- put_one_call_readv(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_readv_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size
- */
- STACK_WIND(frame, do_readv, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- fd_unref(local->fd);
- if (local->xdata)
- dict_unref(local->xdata);
- CRYPT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-static int32_t
-readv_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "stat failed (%d)", op_errno);
- goto error;
- }
- local->buf = *buf;
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
- NULL);
- return 0;
-}
-
-int32_t
-crypt_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
- (int)size, (long long)offset);
- if (parent_is_crypt_xlator(frame, this))
- gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_READ);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- if (size == 0)
- goto trivial;
-
- local->fd = fd_ref(fd);
- local->flags = flags;
-
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- fd_unref(fd);
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- fd_unref(fd);
- goto error;
- }
- set_config_offsets(frame, this, offset, size, DATA_ATOM, 0);
- if (parent_is_crypt_xlator(frame, this)) {
- data_t *data;
- /*
- * We are called by crypt_writev (or cypt_ftruncate)
- * to perform the "read" component of the read-modify-write
- * (or read-prune-write) sequence for some atom;
- *
- * don't ask for access:
- * it has already been acquired
- *
- * Retrieve current file size
- */
- if (!xdata) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size hasn't been passed");
- ret = EIO;
- goto error;
- }
- data = dict_get(xdata, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- ret = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- get_one_call(frame);
- STACK_WIND(frame, crypt_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, local->fd,
- /*
- * FIXME: read amount can be reduced
- */
- local->data_conf.expanded_size,
- local->data_conf.aligned_offset, flags, NULL);
- return 0;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_RDLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_readv_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-trivial:
- STACK_WIND(frame, readv_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readv, frame, -1, ret, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-void
-set_local_io_params_writev(call_frame_t *frame,
- struct object_cipher_info *object,
- struct rmw_atom *atom, off_t io_offset,
- uint32_t io_size)
-{
- crypt_local_t *local = frame->local;
-
- local->io_offset = io_offset;
- local->io_size = io_size;
-
- local->io_offset_nopad = atom->offset_at(frame, object) +
- atom->offset_in(frame, object);
-
- gf_log("crypt", GF_LOG_DEBUG, "set nopad offset to %llu",
- (unsigned long long)local->io_offset_nopad);
-
- local->io_size_nopad = atom->io_size_nopad(frame, object);
-
- gf_log("crypt", GF_LOG_DEBUG, "set nopad size to %llu",
- (unsigned long long)local->io_size_nopad);
-
- local->update_disk_file_size = 0;
- /*
- * NOTE: eof_padding_size is 0 for all full atoms;
- * For head and tail atoms it will be set up at rmw_partial block()
- */
- local->new_file_size = local->cur_file_size;
-
- if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
- local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
-
- gf_log("crypt", GF_LOG_DEBUG, "set new file size to %llu",
- (unsigned long long)local->new_file_size);
-
- local->update_disk_file_size = 1;
- }
-}
-
-void
-set_local_io_params_ftruncate(call_frame_t *frame,
- struct object_cipher_info *object)
-{
- uint32_t resid;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- resid = conf->orig_offset & (object_alg_blksize(object) - 1);
- if (resid) {
- local->eof_padding_size = object_alg_blksize(object) - resid;
- local->new_file_size = conf->aligned_offset;
- local->update_disk_file_size = 0;
- /*
- * file size will be updated
- * in the ->writev() stack,
- * when submitting file tail
- */
- } else {
- local->eof_padding_size = 0;
- local->new_file_size = conf->orig_offset;
- local->update_disk_file_size = 1;
- /*
- * file size will be updated
- * in this ->ftruncate stack
- */
- }
-}
-
-static void
-submit_head(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- submit_partial(frame, this, local->fd, HEAD_ATOM);
-}
-
-static void
-submit_tail(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- submit_partial(frame, this, local->fd, TAIL_ATOM);
-}
-
-static void
-submit_hole(call_frame_t *frame, xlator_t *this)
-{
- /*
- * hole conversion always means
- * appended write and goes in ordered fashion
- */
- do_ordered_submit(frame, this, HOLE_ATOM);
-}
-
-static void
-submit_data(call_frame_t *frame, xlator_t *this)
-{
- if (is_ordered_mode(frame)) {
- do_ordered_submit(frame, this, DATA_ATOM);
- return;
- }
- gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
- get_nr_calls(frame, nr_calls_data(frame));
- do_parallel_submit(frame, this, DATA_ATOM);
- return;
-}
-
-/*
- * heplers called by writev_cbk, fruncate_cbk in ordered mode
- */
-
-static int32_t
-should_submit_hole(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
-
- return conf->avec != NULL;
-}
-
-static int32_t
-should_resume_submit_hole(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
-
- if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
- /*
- * Don't submit a part of hole, which
- * fits into a data block:
- * this part of hole will be converted
- * as a gap filled by zeros in data head
- * block.
- */
- return conf->cursor < conf->acount - 1;
- else
- return conf->cursor < conf->acount;
-}
-
-static int32_t
-should_resume_submit_data(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- if (is_ordered_mode(frame))
- return conf->cursor < conf->acount;
- /*
- * parallel writes
- */
- return 0;
-}
-
-static int32_t
-should_submit_data_after_hole(crypt_local_t *local)
-{
- return local->data_conf.avec != NULL;
-}
-
-static void
-update_local_file_params(call_frame_t *frame, xlator_t *this,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- crypt_local_t *local = frame->local;
-
- check_buf(frame, this, postbuf);
-
- local->prebuf = *prebuf;
- local->postbuf = *postbuf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->new_file_size;
-
- local->cur_file_size = local->new_file_size;
-}
-
-static int32_t
-end_writeback_writev(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret <= 0) {
- gf_log(this->name, GF_LOG_WARNING, "writev iteration failed");
- goto put_one_call;
- }
- /*
- * op_ret includes paddings (atom's head, atom's tail and EOF)
- */
- if (op_ret < local->io_size) {
- gf_log(this->name, GF_LOG_WARNING, "Incomplete writev iteration");
- goto put_one_call;
- }
- op_ret -= local->eof_padding_size;
- local->op_ret = op_ret;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- if (data_write_in_progress(local)) {
- LOCK(&local->rw_count_lock);
- local->rw_count += op_ret;
- UNLOCK(&local->rw_count_lock);
-
- if (should_resume_submit_data(frame))
- submit_data(frame, this);
- } else {
- /*
- * hole conversion is going on;
- * don't take into account written zeros
- */
- if (should_resume_submit_hole(local))
- submit_hole(frame, this);
-
- else if (should_submit_data_after_hole(local))
- submit_data(frame, this);
- }
-put_one_call:
- put_one_call_writev(frame, this);
- return 0;
-}
-
-#define crypt_writev_cbk end_writeback_writev
-
-#define HOLE_WRITE_CHUNK_BITS 12
-#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
-
-/*
- * Convert hole of size @size at offset @off to
- * zeros and prepare respective iovecs for submit.
- * The hole lock should be held.
- *
- * Pre-conditions:
- * @local->file_size is set and valid.
- */
-int32_t
-prepare_for_submit_hole(call_frame_t *frame, xlator_t *this, uint64_t off,
- off_t size)
-{
- int32_t ret;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
-
- ret = set_config_avec_hole(this, local, &local->hole_conf, object,
- local->fop);
- crypt_check_conf(&local->hole_conf);
-
- return ret;
-}
-
-/*
- * prepare for submit @count bytes at offset @from
- */
-int32_t
-prepare_for_submit_data(call_frame_t *frame, xlator_t *this, off_t from,
- int32_t size, struct iovec *vec, int32_t vec_count,
- int32_t setup_gap)
-{
- uint32_t ret;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_config_offsets(frame, this, from, size, DATA_ATOM, setup_gap);
-
- ret = set_config_avec_data(this, local, &local->data_conf, object, vec,
- vec_count);
- crypt_check_conf(&local->data_conf);
-
- return ret;
-}
-
-static void
-free_avec(struct iovec *avec, char **pool, int blocks_in_pool)
-{
- if (!avec)
- return;
- GF_FREE(pool);
- GF_FREE(avec);
-}
-
-static void
-free_avec_data(crypt_local_t *local)
-{
- return free_avec(local->data_conf.avec, local->data_conf.pool,
- local->data_conf.blocks_in_pool);
-}
-
-static void
-free_avec_hole(crypt_local_t *local)
-{
- return free_avec(local->hole_conf.avec, local->hole_conf.pool,
- local->hole_conf.blocks_in_pool);
-}
-
-static void
-do_parallel_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf;
-
- local->active_setup = dtype;
- conf = conf_by_type(frame, dtype);
-
- if (has_head_block(conf))
- submit_head(frame, this);
-
- if (has_full_blocks(conf))
- submit_full(frame, this);
-
- if (has_tail_block(conf))
- submit_tail(frame, this);
- return;
-}
-
-static void
-do_ordered_submit(call_frame_t *frame, xlator_t *this, atom_data_type dtype)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf;
-
- local->active_setup = dtype;
- conf = conf_by_type(frame, dtype);
-
- if (should_submit_head_block(conf)) {
- get_one_call_nolock(frame);
- submit_head(frame, this);
- } else if (should_submit_full_block(conf)) {
- get_one_call_nolock(frame);
- submit_full(frame, this);
- } else if (should_submit_tail_block(conf)) {
- get_one_call_nolock(frame);
- submit_tail(frame, this);
- } else
- gf_log("crypt", GF_LOG_DEBUG,
- "nothing has been submitted in ordered mode");
- return;
-}
-
-static int32_t
-do_writev(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
- struct object_cipher_info *object = &local->info->cinfo;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_ret = -1;
- op_errno = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
-
- if (local->cur_file_size < local->data_conf.orig_offset) {
- /*
- * Set up hole config
- */
- op_errno = prepare_for_submit_hole(
- frame, this, local->cur_file_size,
- local->data_conf.orig_offset - local->cur_file_size);
- if (op_errno) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- goto error;
- }
- }
- if (should_submit_hole(local))
- submit_hole(frame, this);
- else
- submit_data(frame, this);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_writev_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size first
- */
- STACK_WIND(frame, do_writev, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-static int32_t
-writev_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *dict)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->prebuf = *buf;
- local->postbuf = *buf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->cur_file_size;
-
- get_one_call(frame);
- put_one_call_writev(frame, this);
- return 0;
-}
-
-int
-crypt_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vec,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
- (int)iov_length(vec, count), (long long)offset);
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- if (iobref)
- local->iobref = iobref_ref(iobref);
- /*
- * to update real file size on the server
- */
- local->xattr = dict_new();
- if (!local->xattr) {
- ret = ENOMEM;
- goto error;
- }
- local->flags = flags;
-
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- goto error;
- }
- if (iov_length(vec, count) == 0)
- goto trivial;
-
- ret = prepare_for_submit_data(frame, this, offset,
- iov_length(vec, count),
- vec, count, 0 /* don't setup gup
- in tail: we don't
- know file size yet */);
- if (ret) {
- ret = ENOMEM;
- goto error;
- }
-
- if (parent_is_crypt_xlator(frame, this)) {
- data_t *data;
- /*
- * we are called by shinking crypt_ftruncate(),
- * which doesn't perform hole conversion;
- *
- * don't ask for access:
- * it has already been acquired
- */
-
- /*
- * extract file size
- */
- if (!xdata) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size hasn't been passed");
- ret = EIO;
- goto error;
- }
- data = dict_get(xdata, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- ret = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- submit_data(frame, this);
- return 0;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
- /*
- * lock the file and retrieve its size
- */
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_writev_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-trivial:
- STACK_WIND(frame, writev_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, NULL);
- return 0;
-error:
- if (local && local->fd)
- fd_unref(fd);
- if (local && local->iobref)
- iobref_unref(iobref);
- if (local && local->xdata)
- dict_unref(xdata);
- if (local && local->xattr)
- dict_unref(local->xattr);
- if (local && local->info)
- free_inode_info(local->info);
-
- CRYPT_STACK_UNWIND(writev, frame, -1, ret, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- set_config_offsets(frame, this,
- offset,
- 0, /* count */
- DATA_ATOM,
- 0 /* since we prune, there is no
- gap in tail to uptodate */);
- return 0;
-}
-
-/*
- * Finish the read-prune-modify sequence
- *
- * Can be invoked as
- * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
- * 2) ->writev_cbk() for non-cblock-aligned prune
- */
-
-static int32_t
-prune_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * This is called as ->ftruncate_cbk()
- *
- * Perform the "write" component of the
- * read-prune-write sequence.
- *
- * submuit the rest of the file
- */
-static int32_t
-prune_submit_file_tail(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- dict_t *dict;
-
- if (op_ret < 0)
- goto put_one_call;
-
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- dict = dict_new();
- if (!dict) {
- op_errno = ENOMEM;
- goto error;
- }
-
- update_local_file_params(frame, this, prebuf, postbuf);
- local->new_file_size = conf->orig_offset;
-
- /*
- * The rest of the file is a partial block and, hence,
- * should be written via RMW sequence, so the crypt xlator
- * does STACK_WIND to itself.
- *
- * Pass current file size to crypt_writev()
- */
- op_errno = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (op_errno) {
- gf_log("crypt", GF_LOG_WARNING, "can not set key to update file size");
- dict_unref(dict);
- goto error;
- }
- gf_log("crypt", GF_LOG_DEBUG,
- "passing current file size (%llu) to crypt_writev",
- (unsigned long long)local->cur_file_size);
- /*
- * Padding will be filled with
- * zeros by rmw_partial_block()
- */
- STACK_WIND(frame, prune_complete, this,
- this->fops->writev, /* crypt_writev */
- local->fd, &local->vec, 1,
- conf->aligned_offset, /* offset to write from */
- 0, local->iobref, dict);
-
- dict_unref(dict);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * This is called as a callback of ->writev() invoked in behalf
- * of ftruncate(): it can be
- * 1) ordered writes issued by hole conversion in the case of
- * expanded truncate, or
- * 2) an rmw partial data block issued by non-cblock-aligned
- * prune.
- */
-int32_t
-end_writeback_ftruncate(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- /*
- * if nothing has been written,
- * then it must be an error
- */
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto put_one_call;
-
- update_local_file_params(frame, this, prebuf, postbuf);
-
- if (data_write_in_progress(local))
- /* case (2) */
- goto put_one_call;
- /* case (1) */
- if (should_resume_submit_hole(local))
- submit_hole(frame, this);
- /*
- * case of hole, when we shouldn't resume
- */
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * Perform prune and write components of the
- * read-prune-write sequence.
- *
- * Called as ->readv_cbk()
- *
- * Pre-conditions:
- * @vec contains the latest atom of the file
- * (plain text)
- */
-static int32_t
-prune_write(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vec, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- int32_t i;
- size_t to_copy;
- size_t copied = 0;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- if (op_ret == -1)
- goto put_one_call;
-
- /*
- * At first, uptodate head block
- */
- if (iov_length(vec, count) < conf->off_in_head) {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to uptodate head block for prune");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto put_one_call;
- }
- local->vec.iov_len = conf->off_in_head;
- local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len, gf_crypt_mt_data);
-
- if (local->vec.iov_base == NULL) {
- gf_log(this->name, GF_LOG_WARNING,
- "Failed to calloc head block for prune");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto put_one_call;
- }
- for (i = 0; i < count; i++) {
- to_copy = vec[i].iov_len;
- if (to_copy > local->vec.iov_len - copied)
- to_copy = local->vec.iov_len - copied;
-
- memcpy((char *)local->vec.iov_base + copied, vec[i].iov_base, to_copy);
- copied += to_copy;
- if (copied == local->vec.iov_len)
- break;
- }
- /*
- * perform prune with aligned offset
- * (i.e. at this step we prune a bit
- * more then it is needed
- */
- STACK_WIND(frame, prune_submit_file_tail, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, local->fd,
- conf->aligned_offset, local->xdata);
- return 0;
-put_one_call:
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * Perform a read-prune-write sequence
- */
-int32_t
-read_prune_write(call_frame_t *frame, xlator_t *this)
-{
- int32_t ret = 0;
- dict_t *dict = NULL;
- crypt_local_t *local = frame->local;
- struct avec_config *conf = &local->data_conf;
- struct object_cipher_info *object = &local->info->cinfo;
-
- set_local_io_params_ftruncate(frame, object);
- get_one_call_nolock(frame);
-
- if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
- /*
- * cblock-aligned prune:
- * we don't need read and write components,
- * just cut file body
- */
- gf_log("crypt", GF_LOG_DEBUG, "prune without RMW (at offset %llu",
- (unsigned long long)conf->orig_offset);
-
- STACK_WIND(frame, prune_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, local->fd,
- conf->orig_offset, local->xdata);
- return 0;
- }
- gf_log("crypt", GF_LOG_DEBUG, "prune with RMW (at offset %llu",
- (unsigned long long)conf->orig_offset);
- /*
- * We are about to perform the "read" component of the
- * read-prune-write sequence. It means that we need to
- * read encrypted data from disk and decrypt it.
- * So, the crypt translator does STACK_WIND to itself.
- *
- * Pass current file size to crypt_readv()
-
- */
- dict = dict_new();
- if (!dict) {
- gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
- ret = ENOMEM;
- goto exit;
- }
- ret = dict_set(dict, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
- goto exit;
- }
- STACK_WIND(frame, prune_write, this, this->fops->readv, /* crypt_readv */
- local->fd, get_atom_size(object), /* bytes to read */
- conf->aligned_offset, /* offset to read from */
- 0, dict);
-exit:
- if (dict)
- dict_unref(dict);
- return ret;
-}
-
-/*
- * File prune is more complicated than expand.
- * First we need to read the latest atom to not lose info
- * needed for proper update. Also we need to make sure that
- * every component of read-prune-write sequence leaves data
- * consistent
- *
- * Non-cblock aligned prune is performed as read-prune-write
- * sequence:
- *
- * 1) read the latest atom;
- * 2) perform cblock-aligned prune
- * 3) issue a write request for the end-of-file
- */
-int32_t
-prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- int32_t ret;
-
- ret = prepare_for_prune(frame, this, offset);
- if (ret)
- return ret;
- return read_prune_write(frame, this);
-}
-
-int32_t
-expand_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
-{
- int32_t ret;
- crypt_local_t *local = frame->local;
-
- ret = prepare_for_submit_hole(frame, this, local->old_file_size,
- offset - local->old_file_size);
- if (ret)
- return ret;
- submit_hole(frame, this);
- return 0;
-}
-
-static int32_t
-ftruncate_trivial_completion(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *dict)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- local->prebuf = *buf;
- local->postbuf = *buf;
-
- local->prebuf.ia_size = local->cur_file_size;
- local->postbuf.ia_size = local->cur_file_size;
-
- get_one_call(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-static int32_t
-do_ftruncate(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- if (op_ret)
- goto error;
- /*
- * extract regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_errno = EIO;
- goto error;
- }
- local->old_file_size = local->cur_file_size = data_to_uint64(data);
-
- if (local->data_conf.orig_offset == local->cur_file_size) {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG,
- "trivial ftruncate (current file size %llu)",
- (unsigned long long)local->cur_file_size);
-#endif
- goto trivial;
- } else if (local->data_conf.orig_offset < local->cur_file_size) {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
- (unsigned long long)local->cur_file_size,
- (unsigned long long)local->data_conf.orig_offset);
-#endif
- op_errno = prune_file(frame, this, local->data_conf.orig_offset);
- } else {
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
- (unsigned long long)local->cur_file_size,
- (unsigned long long)local->data_conf.orig_offset);
-#endif
- op_errno = expand_file(frame, this, local->data_conf.orig_offset);
- }
- if (op_errno)
- goto error;
- return 0;
-trivial:
- STACK_WIND(frame, ftruncate_trivial_completion, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, local->fd, NULL);
- return 0;
-error:
- /*
- * finish with ftruncate
- */
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- get_one_call_nolock(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-static int32_t
-crypt_ftruncate_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
- /*
- * An access has been granted,
- * retrieve file size first
- */
- STACK_WIND(frame, do_ftruncate, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-error:
- get_one_call_nolock(frame);
- put_one_call_ftruncate(frame, this);
- return 0;
-}
-
-/*
- * ftruncate is performed in 2 steps:
- * . receive file size;
- * . expand or prune file.
- */
-static int32_t
-crypt_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
- struct crypt_inode_info *info;
- struct gf_flock lock = {
- 0,
- };
-
- local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
- info = local_get_inode_info(local, this);
- if (info == NULL) {
- ret = EINVAL;
- goto error;
- }
- if (!object_alg_atomic(&info->cinfo)) {
- ret = EINVAL;
- goto error;
- }
- local->data_conf.orig_offset = offset;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_ftruncate_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-error:
- if (local && local->fd)
- fd_unref(fd);
- if (local && local->xdata)
- dict_unref(xdata);
- if (local && local->xattr)
- dict_unref(local->xattr);
- if (local && local->info)
- free_inode_info(local->info);
-
- CRYPT_STACK_UNWIND(ftruncate, frame, -1, ret, NULL, NULL, NULL);
- return 0;
-}
-
-/* ->flush_cbk() */
-int32_t
-truncate_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- CRYPT_STACK_UNWIND(truncate, frame, op_ret, op_errno, &local->prebuf,
- &local->postbuf, local->xdata);
- return 0;
-}
-
-/* ftruncate_cbk() */
-int32_t
-truncate_flush(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *fd = local->fd;
- local->prebuf = *prebuf;
- local->postbuf = *postbuf;
-
- STACK_WIND(frame, truncate_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, NULL);
- fd_unref(fd);
- return 0;
-}
-
-/*
- * is called as ->open_cbk()
- */
-static int32_t
-truncate_begin(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0) {
- fd_unref(fd);
- CRYPT_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
- return 0;
- } else {
- fd_bind(fd);
- }
- /*
- * crypt_truncate() is implemented via crypt_ftruncate(),
- * so the crypt xlator does STACK_WIND to itself here
- */
- STACK_WIND(frame, truncate_flush, this,
- this->fops->ftruncate, /* crypt_ftruncate */
- fd, local->offset, NULL);
- return 0;
-}
-
-/*
- * crypt_truncate() is implemented via crypt_ftruncate() as a
- * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
- */
-int32_t
-crypt_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- fd_t *fd;
- crypt_local_t *local;
-
-#if DEBUG_CRYPT
- gf_log(this->name, GF_LOG_DEBUG, "truncate file %s at offset %llu",
- loc->path, (unsigned long long)offset);
-#endif
- local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
- if (!local)
- goto error;
-
- fd = fd_create(loc->inode, frame->root->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
- goto error;
- }
- local->fd = fd;
- local->offset = offset;
- local->xdata = xdata;
- STACK_WIND(frame, truncate_begin, this, this->fops->open, /* crypt_open() */
- loc, O_RDWR, fd, NULL);
- return 0;
-error:
- CRYPT_STACK_UNWIND(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
- return 0;
-}
-
-end_writeback_handler_t
-dispatch_end_writeback(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_WRITE:
- return end_writeback_writev;
- case GF_FOP_FTRUNCATE:
- return end_writeback_ftruncate;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
- return NULL;
- }
-}
-
-/*
- * true, if the caller needs metadata string
- */
-static int32_t
-is_custom_mtd(dict_t *xdata)
-{
- data_t *data;
- uint32_t flags;
-
- if (!xdata)
- return 0;
-
- data = dict_get(xdata, MSGFLAGS_PREFIX);
- if (!data)
- return 0;
- if (data->len != sizeof(uint32_t)) {
- gf_log("crypt", GF_LOG_WARNING, "Bad msgflags size (%d)", data->len);
- return -1;
- }
- flags = *((uint32_t *)data->data);
- return msgflags_check_mtd_lock(&flags);
-}
-
-static int32_t
-crypt_open_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- if (op_ret < 0)
- gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)", op_errno);
- put_one_call_open(frame);
- return 0;
-}
-
-static void
-crypt_open_tail(call_frame_t *frame, xlator_t *this)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, crypt_open_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
-}
-
-/*
- * load private inode info at open time
- * called as ->fgetxattr_cbk()
- */
-static int
-load_mtd_open(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- int32_t ret;
- gf_boolean_t upload_info;
- data_t *mtd;
- uint64_t value = 0;
- struct crypt_inode_info *info;
- crypt_local_t *local = frame->local;
- crypt_private_t *priv = this->private;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto exit;
- if (op_ret < 0)
- goto exit;
- /*
- * first, check for cached info
- */
- ret = inode_ctx_get(local->fd->inode, this, &value);
- if (ret != -1) {
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING,
- "Inode info expected, but not found");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- /*
- * info has been found in the cache
- */
- upload_info = _gf_false;
- } else {
- /*
- * info hasn't been found in the cache.
- */
- info = alloc_inode_info(local, local->loc);
- if (!info) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto exit;
- }
- init_inode_info_head(info, local->fd);
- upload_info = _gf_true;
- }
- /*
- * extract metadata
- */
- mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
- if (!mtd) {
- local->op_ret = -1;
- local->op_errno = ENOENT;
- gf_log(this->name, GF_LOG_WARNING, "Format string wasn't found");
- goto exit;
- }
- /*
- * authenticate metadata against the path
- */
- ret = open_format((unsigned char *)mtd->data, mtd->len, local->loc, info,
- get_master_cinfo(priv), local, upload_info);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- if (upload_info) {
- ret = init_inode_info_tail(info, get_master_cinfo(priv));
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
- if (ret == -1) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto exit;
- }
- }
- if (local->custom_mtd) {
- /*
- * pass the metadata string to the customer
- */
- ret = dict_set_static_bin(local->xdata, CRYPTO_FORMAT_PREFIX, mtd->data,
- mtd->len);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = ret;
- goto exit;
- }
- }
-exit:
- if (!local->custom_mtd)
- crypt_open_tail(frame, this);
- else
- put_one_call_open(frame);
- return 0;
-}
-
-static int32_t
-crypt_open_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
- goto exit;
- }
- STACK_WIND(frame, load_mtd_open, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- CRYPTO_FORMAT_PREFIX, NULL);
- return 0;
-exit:
- put_one_call_open(frame);
- return 0;
-}
-
-/*
- * verify metadata against the specified pathname
- */
-static int32_t
-crypt_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto exit;
- if (op_ret < 0)
- goto exit;
- if (xdata)
- local->xdata = dict_ref(xdata);
- else if (local->custom_mtd) {
- local->xdata = dict_new();
- if (!local->xdata) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- gf_log("crypt", GF_LOG_ERROR,
- "Can not get new dict for mtd string");
- goto exit;
- }
- }
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_open_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, fd, F_SETLKW,
- &lock, NULL);
- return 0;
-exit:
- put_one_call_open(frame);
- return 0;
-}
-
-static int32_t
-crypt_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- int32_t ret = ENOMEM;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc) {
- ret = ENOMEM;
- goto error;
- }
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- ret = is_custom_mtd(xdata);
- if (ret < 0) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- ret = EINVAL;
- goto error;
- }
- local->custom_mtd = ret;
-
- if ((flags & O_ACCMODE) == O_WRONLY)
- /*
- * we can't open O_WRONLY, because
- * we need to do read-modify-write
- */
- flags = (flags & ~O_ACCMODE) | O_RDWR;
- /*
- * Make sure that out translated offsets
- * and counts won't be ignored
- */
- flags &= ~O_APPEND;
- get_one_call_nolock(frame);
- STACK_WIND(frame, crypt_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(open, frame, -1, ret, NULL, NULL);
- return 0;
-}
-
-static int32_t
-init_inode_info_tail(struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- struct object_cipher_info *object = &info->cinfo;
-
-#if DEBUG_CRYPT
- gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
- uuid_utoa(info->oid));
-#endif
- ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
- master);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
- return ret;
- }
- return 0;
-}
-
-/*
- * Init inode info at ->create() time
- */
-static void
-init_inode_info_create(struct crypt_inode_info *info,
- struct master_cipher_info *master, data_t *data)
-{
- struct object_cipher_info *object;
-
- info->nr_minor = CRYPT_XLATOR_ID;
- memcpy(info->oid, data->data, data->len);
-
- object = &info->cinfo;
-
- object->o_alg = master->m_alg;
- object->o_mode = master->m_mode;
- object->o_block_bits = master->m_block_bits;
- object->o_dkey_size = master->m_dkey_size;
-}
-
-static void
-init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
-{
- memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
-}
-
-static int32_t
-crypt_create_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_private_t *priv = this->private;
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- if (op_ret < 0) {
- free_inode_info(info);
- goto unwind;
- }
- op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
- if (op_errno) {
- op_ret = -1;
- free_inode_info(info);
- goto unwind;
- }
- /*
- * FIXME: drop major subversion number
- */
- op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
- if (op_ret == -1) {
- op_errno = EIO;
- free_inode_info(info);
- goto unwind;
- }
-unwind:
- free_format(local);
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, local_fd, local_inode,
- &local->buf, &local->prebuf, &local->postbuf,
- local_xdata);
- fd_unref(local_fd);
- inode_unref(local_inode);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int
-crypt_create_tail(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- dict_unref(local->xattr);
-
- if (op_ret < 0)
- goto error;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, crypt_create_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- free_inode_info(local->info);
- free_format(local);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, local_fd, local_inode,
- &local->buf, &local->prebuf, &local->postbuf,
- local_xdata);
-
- fd_unref(local_fd);
- inode_unref(local_inode);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int32_t
-crypt_create_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
-
- if (op_ret < 0)
- goto error;
-
- STACK_WIND(frame, crypt_create_tail, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- return 0;
-error:
- free_inode_info(info);
- free_format(local);
- fd_unref(local->fd);
- dict_unref(local->xattr);
- if (local->xdata)
- dict_unref(local->xdata);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-/*
- * Create and store crypt-specific format on disk;
- * Populate cache with private inode info
- */
-static int32_t
-crypt_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- struct crypt_inode_info *info = local->info;
-
- if (op_ret < 0)
- goto error;
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND(frame, crypt_create_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- free_inode_info(info);
- free_format(local);
- fd_unref(local->fd);
- dict_unref(local->xattr);
-
- CRYPT_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- int ret;
- data_t *data;
- crypt_local_t *local;
- crypt_private_t *priv;
- struct master_cipher_info *master;
- struct crypt_inode_info *info;
-
- priv = this->private;
- master = get_master_cinfo(priv);
-
- if (master_alg_atomic(master)) {
- /*
- * We can't open O_WRONLY, because we
- * need to do read-modify-write.
- */
- if ((flags & O_ACCMODE) == O_WRONLY)
- flags = (flags & ~O_ACCMODE) | O_RDWR;
- /*
- * Make sure that out translated offsets
- * and counts won't be ignored
- */
- flags &= ~O_APPEND;
- }
- local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
- if (!local) {
- ret = ENOMEM;
- goto error;
- }
- data = dict_get(xdata, "gfid-req");
- if (!data) {
- ret = EINVAL;
- gf_log("crypt", GF_LOG_WARNING, "gfid not found");
- goto error;
- }
- if (data->len != sizeof(uuid_t)) {
- ret = EINVAL;
- gf_log("crypt", GF_LOG_WARNING, "bad gfid size (%d), should be %d",
- (int)data->len, (int)sizeof(uuid_t));
- goto error;
- }
- info = alloc_inode_info(local, loc);
- if (!info) {
- ret = ENOMEM;
- goto error;
- }
- /*
- * NOTE:
- * format has to be created BEFORE
- * proceeding to the untrusted server
- */
- ret = alloc_format_create(local);
- if (ret) {
- free_inode_info(info);
- goto error;
- }
- init_inode_info_create(info, master, data);
-
- ret = create_format(local->format, loc, info, master);
- if (ret) {
- free_inode_info(info);
- goto error;
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- free_inode_info(info);
- free_format(local);
- goto error;
- }
- ret = dict_set_static_bin(local->xattr, CRYPTO_FORMAT_PREFIX, local->format,
- new_format_size());
- if (ret) {
- dict_unref(local->xattr);
- free_inode_info(info);
- free_format(local);
- ret = EINVAL;
- goto error;
- }
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
- if (ret) {
- dict_unref(local->xattr);
- free_inode_info(info);
- free_format(local);
- ret = ENOMEM;
- goto error;
- }
- local->fd = fd_ref(fd);
-
- STACK_WIND(frame, crypt_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- xdata);
- return 0;
-error:
- gf_log("crypt", GF_LOG_WARNING, "can not create file");
- CRYPT_STACK_UNWIND(create, frame, -1, ret, NULL, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
-}
-
-/*
- * FIXME: this should depends on the version of format string
- */
-static int32_t
-filter_crypt_xattr(dict_t *dict, char *key, data_t *value, void *data)
-{
- dict_del(dict, key);
- return 0;
-}
-
-static int32_t
-crypt_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*", filter_crypt_xattr,
- NULL);
- STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-/*
- * TBD: verify file metadata before wind
- */
-static int32_t
-crypt_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*", filter_crypt_xattr,
- NULL);
- STACK_WIND(frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-/*
- * called as flush_cbk()
- */
-static int32_t
-linkop_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- linkop_unwind_handler_t unwind_fn;
- unwind_fn = linkop_unwind_dispatch(local->fop);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0 && op_errno == ENOENT &&
- local->loc->inode->ia_type == IA_IFLNK) {
- local->op_ret = 0;
- local->op_errno = 0;
- }
- unwind_fn(frame);
- return 0;
-}
-
-/*
- * unpin inode on the server
- */
-static int32_t
-link_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
-
- STACK_WIND(frame, linkop_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- link_unwind(frame);
- return 0;
-}
-
-void
-link_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
- inode_t *inode;
-
- if (!local) {
- CRYPT_STACK_UNWIND(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- inode = local->inode;
-
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->newloc) {
- loc_wipe(local->newloc);
- GF_FREE(local->newloc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, inode,
- &local->buf, &local->prebuf, &local->postbuf, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
- if (inode)
- inode_unref(inode);
-}
-
-void
-link_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, link_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, local->loc, local->newloc,
- local->xdata);
-}
-
-/*
- * unlink()
- */
-static int32_t
-unlink_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- local->prebuf = *preparent;
- local->postbuf = *postparent;
- if (local->xdata) {
- dict_unref(local->xdata);
- local->xdata = NULL;
- }
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- STACK_WIND(frame, linkop_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- unlink_unwind(frame);
- return 0;
-}
-
-void
-unlink_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
-
- if (!local) {
- CRYPT_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->postbuf, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
-}
-
-void
-unlink_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, unlink_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, local->loc, local->flags,
- local->xdata);
-}
-
-void
-rename_unwind(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- dict_t *xdata;
- dict_t *xattr;
- struct iatt *prenewparent;
- struct iatt *postnewparent;
-
- if (!local) {
- CRYPT_STACK_UNWIND(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return;
- }
- xdata = local->xdata;
- xattr = local->xattr;
- prenewparent = local->prenewparent;
- postnewparent = local->postnewparent;
-
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- if (local->newloc) {
- loc_wipe(local->newloc);
- GF_FREE(local->newloc);
- }
- if (local->fd)
- fd_unref(local->fd);
- if (local->format)
- GF_FREE(local->format);
-
- CRYPT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
- &local->buf, &local->prebuf, &local->postbuf,
- prenewparent, postnewparent, xdata);
- if (xdata)
- dict_unref(xdata);
- if (xattr)
- dict_unref(xattr);
- if (prenewparent)
- GF_FREE(prenewparent);
- if (postnewparent)
- GF_FREE(postnewparent);
-}
-
-/*
- * called as flush_cbk()
- */
-static int32_t
-rename_end(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- rename_unwind(frame);
- return 0;
-}
-
-static int32_t
-rename_flush(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, struct iatt *preoldparent,
- struct iatt *postoldparent, struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto error;
- dict_unref(local->xdata);
- local->xdata = NULL;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- local->buf = *buf;
- local->prebuf = *preoldparent;
- local->postbuf = *postoldparent;
- if (prenewparent) {
- local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
- gf_crypt_mt_iatt);
- if (!local->prenewparent) {
- op_errno = ENOMEM;
- goto error;
- }
- *local->prenewparent = *prenewparent;
- }
- if (postnewparent) {
- local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
- gf_crypt_mt_iatt);
- if (!local->postnewparent) {
- op_errno = ENOMEM;
- goto error;
- }
- *local->postnewparent = *postnewparent;
- }
- STACK_WIND(frame, rename_end, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, local->fd, NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- rename_unwind(frame);
- return 0;
-}
-
-void
-rename_wind(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
-
- STACK_WIND(frame, rename_flush, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, local->loc, local->newloc,
- local->xdata);
-}
-
-static int32_t
-__do_linkop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- linkop_wind_handler_t wind_fn;
- linkop_unwind_handler_t unwind_fn;
-
- wind_fn = linkop_wind_dispatch(local->fop);
- unwind_fn = linkop_unwind_dispatch(local->fop);
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret >= 0)
- wind_fn(frame, this);
- else {
- gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)", op_errno);
- unwind_fn(frame);
- }
- return 0;
-}
-
-static int32_t
-do_linkop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- struct gf_flock lock = {
- 0,
- };
- crypt_local_t *local = frame->local;
- linkop_unwind_handler_t unwind_fn;
-
- unwind_fn = linkop_unwind_dispatch(local->fop);
- local->op_ret = op_ret;
- local->op_errno = op_errno;
-
- if (op_ret < 0)
- goto error;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __do_linkop, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-error:
- unwind_fn(frame);
- return 0;
-}
-
-/*
- * Update the metadata string (against the new pathname);
- * submit the result
- */
-static int32_t
-linkop_begin(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- gf_boolean_t upload_info;
- crypt_local_t *local = frame->local;
- crypt_private_t *priv = this->private;
- struct crypt_inode_info *info;
- data_t *old_mtd;
- uint32_t new_mtd_size;
- uint64_t value = 0;
- void (*unwind_fn)(call_frame_t * frame);
- mtd_op_t mop;
-
- unwind_fn = linkop_unwind_dispatch(local->fop);
- mop = linkop_mtdop_dispatch(local->fop);
-
- if (op_ret < 0) {
- /*
- * verification failed
- */
- goto error;
- } else {
- fd_bind(fd);
- }
-
- old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
- if (!old_mtd) {
- op_errno = EIO;
- gf_log(this->name, GF_LOG_DEBUG, "Metadata string wasn't found");
- goto error;
- }
- new_mtd_size = format_size(mop, old_mtd->len);
- op_errno = alloc_format(local, new_mtd_size);
- if (op_errno)
- goto error;
- /*
- * check for cached info
- */
- op_ret = inode_ctx_get(fd->inode, this, &value);
- if (op_ret != -1) {
- info = (struct crypt_inode_info *)(long)value;
- if (info == NULL) {
- gf_log(this->name, GF_LOG_WARNING, "Inode info was not found");
- op_errno = EINVAL;
- goto error;
- }
- /*
- * info was found in the cache
- */
- local->info = info;
- upload_info = _gf_false;
- } else {
- /*
- * info wasn't found in the cache;
- */
- info = alloc_inode_info(local, local->loc);
- if (!info)
- goto error;
- init_inode_info_head(info, fd);
- local->info = info;
- upload_info = _gf_true;
- }
- op_errno = open_format((unsigned char *)old_mtd->data, old_mtd->len,
- local->loc, info, get_master_cinfo(priv), local,
- upload_info);
- if (op_errno)
- goto error;
- if (upload_info == _gf_true) {
- op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
- if (op_errno)
- goto error;
- op_errno = inode_ctx_put(fd->inode, this, (uint64_t)(long)(info));
- if (op_errno == -1) {
- op_errno = EIO;
- goto error;
- }
- }
- /*
- * update the format string (append/update/cup a MAC)
- */
- op_errno = update_format(local->format, (unsigned char *)old_mtd->data,
- old_mtd->len, local->mac_idx, mop, local->newloc,
- info, get_master_cinfo(priv), local);
- if (op_errno)
- goto error;
- /*
- * store the new format string on the server
- */
- if (new_mtd_size) {
- op_errno = dict_set_static_bin(local->xattr, CRYPTO_FORMAT_PREFIX,
- local->format, new_mtd_size);
- if (op_errno)
- goto error;
- }
- STACK_WIND(frame, do_linkop, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, local->loc, local->xattr, 0,
- NULL);
- return 0;
-error:
- local->op_ret = -1;
- local->op_errno = op_errno;
- unwind_fn(frame);
- return 0;
-}
-
-static int32_t
-linkop_grab_local(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, int flags, dict_t *xdata, glusterfs_fop_t op)
-{
- int32_t ret = ENOMEM;
- fd_t *fd;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, op);
- if (!local)
- goto error;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- fd = fd_create(oldloc->inode, frame->root->pid);
- if (!fd) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
- goto error;
- }
- local->fd = fd;
- local->flags = flags;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, oldloc);
- if (ret) {
- GF_FREE(local->loc);
- local->loc = NULL;
- goto error;
- }
- if (newloc) {
- local->newloc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->newloc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- goto error;
- }
- ret = loc_copy(local->newloc, newloc);
- if (ret) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- GF_FREE(local->newloc);
- goto error;
- }
- }
- local->xattr = dict_new();
- if (!local->xattr) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
- ret = ENOMEM;
- goto error;
- }
- return 0;
-
-error:
- if (local) {
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
- local->fd = 0;
- local->loc = NULL;
- local->newloc = NULL;
- local->op_ret = -1;
- local->op_errno = ret;
- }
-
- return ret;
-}
-
-/*
- * read and verify locked metadata against the old pathname (via open);
- * update the metadata string in accordance with the new pathname;
- * submit modified metadata;
- * wind;
- */
-static int32_t
-linkop(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- int flags, dict_t *xdata, glusterfs_fop_t op)
-{
- int32_t ret;
- dict_t *dict;
- crypt_local_t *local;
- void (*unwind_fn)(call_frame_t * frame);
- void (*wind_fn)(call_frame_t * frame, xlator_t * this);
-
- wind_fn = linkop_wind_dispatch(op);
- unwind_fn = linkop_unwind_dispatch(op);
-
- ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
- local = frame->local;
- if (ret)
- goto error;
-
- if (local->fd->inode->ia_type == IA_IFLNK)
- goto wind;
-
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
- ret = ENOMEM;
- goto error;
- }
- /*
- * Set a message to crypt_open() that we need
- * locked metadata string.
- * All link operations (link, unlink, rename)
- * need write lock
- */
- msgflags_set_mtd_wlock(&local->msgflags);
- ret = dict_set_static_bin(dict, MSGFLAGS_PREFIX, &local->msgflags,
- sizeof(local->msgflags));
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
- dict_unref(dict);
- goto error;
- }
- /*
- * verify metadata against the old pathname
- * and retrieve locked metadata string
- */
- STACK_WIND(frame, linkop_begin, this, this->fops->open, /* crypt_open() */
- oldloc, O_RDWR, local->fd, dict);
- dict_unref(dict);
- return 0;
-
-wind:
- wind_fn(frame, this);
- return 0;
-
-error:
- local->op_ret = -1;
- local->op_errno = ret;
- unwind_fn(frame);
- return 0;
-}
-
-static int32_t
-crypt_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
-}
-
-static int32_t
-crypt_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
-}
-
-static int32_t
-crypt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
-}
-
-static void
-put_one_call_open(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- fd_t *fd = local->fd;
- loc_t *loc = local->loc;
- dict_t *xdata = local->xdata;
-
- CRYPT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, fd,
- xdata);
- fd_unref(fd);
- if (xdata)
- dict_unref(xdata);
- loc_wipe(loc);
- GF_FREE(loc);
- }
-}
-
-static int32_t
-__crypt_readv_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- /* read deals with data configs only */
- struct iovec *avec = local->data_conf.avec;
- char **pool = local->data_conf.pool;
- int blocks_in_pool = local->data_conf.blocks_in_pool;
- struct iobref *iobref = local->iobref;
- struct iobref *iobref_data = local->iobref_data;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "readv unlock failed (%d)",
- op_errno);
- if (local->op_ret >= 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- dump_plain_text(local, avec);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
- (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
- (int)(local->rw_count > 0 ? iov_length(avec, local->data_conf.acount)
- : 0),
- (unsigned long long)local->buf.ia_size);
-
- CRYPT_STACK_UNWIND(
- readv, frame, local->rw_count > 0 ? local->rw_count : local->op_ret,
- local->op_errno, avec, avec ? local->data_conf.acount : 0, &local->buf,
- local->iobref, local_xdata);
-
- free_avec(avec, pool, blocks_in_pool);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- if (iobref)
- iobref_unref(iobref);
- if (iobref_data)
- iobref_unref(iobref_data);
- return 0;
-}
-
-static void
-crypt_readv_done(call_frame_t *frame, xlator_t *this)
-{
- if (parent_is_crypt_xlator(frame, this))
- /*
- * don't unlock (it will be done by the parent)
- */
- __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
- else {
- crypt_local_t *local = frame->local;
- struct gf_flock lock = {
- 0,
- };
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_readv_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- }
-}
-
-static void
-put_one_call_readv(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local))
- crypt_readv_done(frame, this);
-}
-
-static int32_t
-__crypt_writev_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- int32_t ret_to_user;
-
- if (local->xattr)
- dict_unref(local->xattr);
- /*
- * Calculate amount of butes to be returned
- * to user. We need to subtract paddings that
- * have been written as a part of atom.
- */
- /*
- * subtract head padding
- */
- if (local->rw_count == 0)
- /*
- * Nothing has been written, it must be an error
- */
- ret_to_user = local->op_ret;
- else if (local->rw_count <= local->data_conf.off_in_head) {
- gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
- ret_to_user = 0;
- } else
- ret_to_user = local->rw_count - local->data_conf.off_in_head;
- /*
- * subtract tail padding
- */
- if (ret_to_user > local->data_conf.orig_size)
- ret_to_user = local->data_conf.orig_size;
-
- if (local->iobref)
- iobref_unref(local->iobref);
- if (local->iobref_data)
- iobref_unref(local->iobref_data);
- free_avec_data(local);
- free_avec_hole(local);
-
- gf_log("crypt", GF_LOG_DEBUG, "writev: ret_to_user: %d", ret_to_user);
-
- CRYPT_STACK_UNWIND(writev, frame, ret_to_user, local->op_errno,
- &local->prebuf, &local->postbuf, local_xdata);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- return 0;
-}
-
-static int32_t
-crypt_writev_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- gf_log("crypt", GF_LOG_WARNING, "can not update file size");
-
- if (parent_is_crypt_xlator(frame, this))
- /*
- * don't unlock (it will be done by the parent)
- */
- __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- else {
- struct gf_flock lock = {
- 0,
- };
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_writev_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- }
- return 0;
-}
-
-static void
-put_one_call_writev(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- if (local->update_disk_file_size) {
- int32_t ret;
- /*
- * update file size, unlock the file and unwind
- */
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING,
- "can not set key to update file size");
- crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- return;
- }
- gf_log("crypt", GF_LOG_DEBUG, "Updating disk file size to %llu",
- (unsigned long long)local->cur_file_size);
- STACK_WIND(frame, crypt_writev_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- } else
- crypt_writev_done(frame, NULL, this, 0, 0, NULL);
- }
-}
-
-static int32_t
-__crypt_ftruncate_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- fd_t *local_fd = local->fd;
- dict_t *local_xdata = local->xdata;
- char *iobase = local->vec.iov_base;
-
- if (op_ret < 0) {
- gf_log(this->name, GF_LOG_WARNING, "ftruncate unlock failed (%d)",
- op_errno);
- if (local->op_ret >= 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- if (local->iobref_data)
- iobref_unref(local->iobref_data);
- free_avec_data(local);
- free_avec_hole(local);
-
- gf_log("crypt", GF_LOG_DEBUG,
- "ftruncate, return to user: presize=%llu, postsize=%llu",
- (unsigned long long)local->prebuf.ia_size,
- (unsigned long long)local->postbuf.ia_size);
-
- CRYPT_STACK_UNWIND(ftruncate, frame, ((local->op_ret < 0) ? -1 : 0),
- local->op_errno, &local->prebuf, &local->postbuf,
- local_xdata);
- fd_unref(local_fd);
- if (local_xdata)
- dict_unref(local_xdata);
- if (iobase)
- GF_FREE(iobase);
- return 0;
-}
-
-static int32_t
-crypt_ftruncate_done(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
- struct gf_flock lock = {
- 0,
- };
-
- dict_unref(local->xattr);
- if (op_ret < 0)
- gf_log("crypt", GF_LOG_WARNING, "can not update file size");
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND(frame, __crypt_ftruncate_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, this->name, local->fd,
- F_SETLKW, &lock, NULL);
- return 0;
-}
-
-static void
-put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
-{
- crypt_local_t *local = frame->local;
- if (put_one_call(local)) {
- if (local->update_disk_file_size) {
- int32_t ret;
- /*
- * update file size, unlock the file and unwind
- */
- ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX,
- data_from_uint64(local->cur_file_size));
- if (ret) {
- gf_log("crypt", GF_LOG_WARNING,
- "can not set key to update file size");
- crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
- return;
- }
- gf_log("crypt", GF_LOG_DEBUG, "Updating disk file size to %llu",
- (unsigned long long)local->cur_file_size);
- STACK_WIND(frame, crypt_ftruncate_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, local->fd,
- local->xattr, /* CRYPTO_FORMAT_PREFIX */
- 0, NULL);
- } else
- crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
- }
-}
-
-/*
- * load regular file size for some FOPs
- */
-static int32_t
-load_file_size(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- data_t *data;
- crypt_local_t *local = frame->local;
-
- dict_t *local_xdata = local->xdata;
- inode_t *local_inode = local->inode;
-
- if (op_ret < 0)
- goto unwind;
- /*
- * load regular file size
- */
- data = dict_get(dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- if (local->xdata)
- dict_unref(local->xdata);
- gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
- op_ret = -1;
- op_errno = EIO;
- goto unwind;
- }
- local->buf.ia_size = data_to_uint64(data);
-
- gf_log(this->name, GF_LOG_DEBUG, "FOP %d: Translate regular file to %llu",
- local->fop, (unsigned long long)local->buf.ia_size);
-unwind:
- if (local->fd)
- fd_unref(local->fd);
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- switch (local->fop) {
- case GF_FOP_FSTAT:
- CRYPT_STACK_UNWIND(fstat, frame, op_ret, op_errno,
- op_ret >= 0 ? &local->buf : NULL, local->xdata);
- break;
- case GF_FOP_STAT:
- CRYPT_STACK_UNWIND(stat, frame, op_ret, op_errno,
- op_ret >= 0 ? &local->buf : NULL, local->xdata);
- break;
- case GF_FOP_LOOKUP:
- CRYPT_STACK_UNWIND(lookup, frame, op_ret, op_errno,
- op_ret >= 0 ? local->inode : NULL,
- op_ret >= 0 ? &local->buf : NULL, local->xdata,
- op_ret >= 0 ? &local->postbuf : NULL);
- break;
- case GF_FOP_READ:
- CRYPT_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0,
- op_ret >= 0 ? &local->buf : NULL, NULL, NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- if (local_xdata)
- dict_unref(local_xdata);
- if (local_inode)
- inode_unref(local_inode);
- return 0;
-}
-
-static int32_t
-crypt_stat_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto unwind;
- if (!IA_ISREG(buf->ia_type))
- goto unwind;
-
- local->buf = *buf;
- if (xdata)
- local->xdata = dict_ref(xdata);
-
- switch (local->fop) {
- case GF_FOP_FSTAT:
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, local->fd,
- FSIZE_XATTR_PREFIX, NULL);
- break;
- case GF_FOP_STAT:
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- return 0;
-unwind:
- if (local->fd)
- fd_unref(local->fd);
- if (local->loc) {
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- }
- switch (local->fop) {
- case GF_FOP_FSTAT:
- CRYPT_STACK_UNWIND(fstat, frame, op_ret, op_errno,
- op_ret >= 0 ? buf : NULL,
- op_ret >= 0 ? xdata : NULL);
- break;
- case GF_FOP_STAT:
- CRYPT_STACK_UNWIND(stat, frame, op_ret, op_errno,
- op_ret >= 0 ? buf : NULL,
- op_ret >= 0 ? xdata : NULL);
- break;
- default:
- gf_log(this->name, GF_LOG_WARNING, "Improper file operation %d",
- local->fop);
- }
- return 0;
-}
-
-static int32_t
-crypt_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
- if (!local)
- goto error;
- local->fd = fd_ref(fd);
- STACK_WIND(frame, crypt_stat_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(fstat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_STAT);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- goto error;
- }
- STACK_WIND(frame, crypt_stat_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(stat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- crypt_local_t *local = frame->local;
-
- if (op_ret < 0)
- goto unwind;
- if (!IA_ISREG(buf->ia_type))
- goto unwind;
-
- local->inode = inode_ref(inode);
- local->buf = *buf;
- local->postbuf = *postparent;
- if (xdata)
- local->xdata = dict_ref(xdata);
- gf_uuid_copy(local->loc->gfid, buf->ia_gfid);
-
- STACK_WIND(frame, load_file_size, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, local->loc,
- FSIZE_XATTR_PREFIX, NULL);
- return 0;
-unwind:
- loc_wipe(local->loc);
- GF_FREE(local->loc);
- CRYPT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
- return 0;
-}
-
-static int32_t
-crypt_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- int32_t ret;
- crypt_local_t *local;
-
- local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
- if (!local)
- goto error;
- local->loc = GF_CALLOC(1, sizeof(loc_t), gf_crypt_mt_loc);
- if (!local->loc)
- goto error;
- ret = loc_copy(local->loc, loc);
- if (ret) {
- GF_FREE(local->loc);
- goto error;
- }
- gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
- STACK_WIND(frame, crypt_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-/*
- * for every regular directory entry find its real file size
- * and update stat's buf properly
- */
-static int32_t
-crypt_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- gf_dirent_t *entry = NULL;
-
- if (op_ret < 0)
- goto unwind;
-
- list_for_each_entry(entry, (&entries->list), list)
- {
- data_t *data;
-
- if (!IA_ISREG(entry->d_stat.ia_type))
- continue;
- data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
- if (!data) {
- gf_log("crypt", GF_LOG_WARNING,
- "Regular file size of direntry not found");
- op_errno = EIO;
- op_ret = -1;
- break;
- }
- entry->d_stat.ia_size = data_to_uint64(data);
- }
-unwind:
- CRYPT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-/*
- * ->readdirp() fills in-core inodes, so we need to set proper
- * file sizes for all directory entries of the parent @fd.
- * Actual updates take place in ->crypt_readdirp_cbk()
- */
-static int32_t
-crypt_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- int32_t ret = ENOMEM;
-
- if (!xdata) {
- xdata = dict_new();
- if (!xdata)
- goto error;
- } else
- dict_ref(xdata);
- /*
- * make sure that we'll have real file sizes at ->readdirp_cbk()
- */
- ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
- if (ret) {
- dict_unref(xdata);
- ret = ENOMEM;
- goto error;
- }
- STACK_WIND(frame, crypt_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- dict_unref(xdata);
- return 0;
-error:
- CRYPT_STACK_UNWIND(readdirp, frame, -1, ret, NULL, NULL);
- return 0;
-}
-
-static int32_t
-crypt_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
-{
- gf_log(this->name, GF_LOG_WARNING,
- "NFS mounts of encrypted volumes are unsupported");
- CRYPT_STACK_UNWIND(access, frame, -1, EPERM, NULL);
- return 0;
-}
-
-int32_t
-master_set_block_size(xlator_t *this, crypt_private_t *priv, dict_t *options)
-{
- uint64_t block_size = 0;
- struct master_cipher_info *master = get_master_cinfo(priv);
-
- if (options != NULL)
- GF_OPTION_RECONF("block-size", block_size, options, size_uint64, error);
- else
- GF_OPTION_INIT("block-size", block_size, size_uint64, error);
-
- switch (block_size) {
- case 512:
- master->m_block_bits = 9;
- break;
- case 1024:
- master->m_block_bits = 10;
- break;
- case 2048:
- master->m_block_bits = 11;
- break;
- case 4096:
- master->m_block_bits = 12;
- break;
- default:
- gf_log("crypt", GF_LOG_ERROR, "FATAL: unsupported block size %llu",
- (unsigned long long)block_size);
- goto error;
- }
- return 0;
-error:
- return -1;
-}
-
-int32_t
-master_set_alg(xlator_t *this, crypt_private_t *priv)
-{
- struct master_cipher_info *master = get_master_cinfo(priv);
- master->m_alg = AES_CIPHER_ALG;
- return 0;
-}
-
-int32_t
-master_set_mode(xlator_t *this, crypt_private_t *priv)
-{
- struct master_cipher_info *master = get_master_cinfo(priv);
- master->m_mode = XTS_CIPHER_MODE;
- return 0;
-}
-
-/*
- * set key size in bits to the master info
- * Pre-conditions: cipher mode in the master info is uptodate.
- */
-static int
-master_set_data_key_size(xlator_t *this, crypt_private_t *priv, dict_t *options)
-{
- int32_t ret;
- uint64_t key_size = 0;
- struct master_cipher_info *master = get_master_cinfo(priv);
-
- if (options != NULL)
- GF_OPTION_RECONF("data-key-size", key_size, options, uint64, error);
- else
- GF_OPTION_INIT("data-key-size", key_size, uint64, error);
-
- ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR,
- "FATAL: wrong bin key size %llu for alg %d mode %d",
- (unsigned long long)key_size, (int)master->m_alg,
- (int)master->m_mode);
- goto error;
- }
- master->m_dkey_size = key_size;
- return 0;
-error:
- return -1;
-}
-
-static int
-is_hex(char *s)
-{
- return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
-}
-
-static int
-parse_hex_buf(xlator_t *this, char *src, unsigned char *dst, int hex_size)
-{
- int i;
- int hex_byte = 0;
-
- for (i = 0; i < (hex_size / 2); i++) {
- if (!is_hex(src + i * 2) || !is_hex(src + i * 2 + 1)) {
- gf_log("crypt", GF_LOG_ERROR, "FATAL: not hex symbol in key");
- return -1;
- }
- if (sscanf(src + i * 2, "%2x", &hex_byte) != 1) {
- gf_log("crypt", GF_LOG_ERROR, "FATAL: can not parse hex key");
- return -1;
- }
- dst[i] = hex_byte & 0xff;
- }
- return 0;
-}
-
-/*
- * Parse options;
- * install master volume key
- */
-int32_t
-master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
-{
- int32_t ret;
- FILE *file = NULL;
-
- int32_t key_size;
- char *opt_key_file_pathname = NULL;
-
- unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
- char hex_buf[2 * MASTER_VOL_KEY_SIZE];
-
- struct master_cipher_info *master = get_master_cinfo(priv);
- /*
- * extract master key passed via option
- */
- GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
-
- if (!opt_key_file_pathname) {
- gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
- return -1;
- }
- gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
- opt_key_file_pathname);
-
- file = fopen(opt_key_file_pathname, "r");
- if (file == NULL) {
- gf_log(this->name, GF_LOG_ERROR,
- "FATAL: can not open file with master key");
- return -1;
- }
- /*
- * extract hex key
- */
- key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
- if (key_size < sizeof(hex_buf)) {
- gf_log(this->name, GF_LOG_ERROR, "FATAL: master key is too short");
- goto bad_key;
- }
- ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
- if (ret)
- goto bad_key;
- memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
- memset(hex_buf, 0, sizeof(hex_buf));
- fclose(file);
-
- memset(bin_buf, 0, sizeof(bin_buf));
- return 0;
-bad_key:
- gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
- if (file)
- fclose(file);
- memset(bin_buf, 0, sizeof(bin_buf));
- return -1;
-}
-
-/*
- * Derive volume key for object-id authentication
- */
-int32_t
-master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
-{
- return get_nmtd_vol_key(get_master_cinfo(priv));
-}
-
-int32_t
-crypt_init_xlator(xlator_t *this)
-{
- int32_t ret;
- crypt_private_t *priv = this->private;
-
- ret = master_set_alg(this, priv);
- if (ret)
- return ret;
- ret = master_set_mode(this, priv);
- if (ret)
- return ret;
- ret = master_set_block_size(this, priv, NULL);
- if (ret)
- return ret;
- ret = master_set_data_key_size(this, priv, NULL);
- if (ret)
- return ret;
- ret = master_set_master_vol_key(this, priv);
- if (ret)
- return ret;
- return master_set_nmtd_vol_key(this, priv);
-}
-
-static int32_t
-crypt_alloc_private(xlator_t *this)
-{
- this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
- if (!this->private) {
- gf_log("crypt", GF_LOG_ERROR,
- "Can not allocate memory for private data");
- return ENOMEM;
- }
- return 0;
-}
-
-static void
-crypt_free_private(xlator_t *this)
-{
- crypt_private_t *priv = this->private;
- if (priv) {
- memset(priv, 0, sizeof(*priv));
- GF_FREE(priv);
- }
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init(this, gf_crypt_mt_end);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int32_t
-reconfigure(xlator_t *this, dict_t *options)
-{
- int32_t ret = -1;
- crypt_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO("crypt", this, error);
- GF_VALIDATE_OR_GOTO(this->name, this->private, error);
- GF_VALIDATE_OR_GOTO(this->name, options, error);
-
- priv = this->private;
-
- ret = master_set_block_size(this, priv, options);
- if (ret) {
- gf_log("this->name", GF_LOG_ERROR, "Failed to reconfure block size");
- goto error;
- }
- ret = master_set_data_key_size(this, priv, options);
- if (ret) {
- gf_log("this->name", GF_LOG_ERROR, "Failed to reconfure data key size");
- goto error;
- }
- return 0;
-error:
- return ret;
-}
-
-int32_t
-init(xlator_t *this)
-{
- int32_t ret;
-
- if (!this->children || this->children->next) {
- gf_log("crypt", GF_LOG_ERROR,
- "FATAL: crypt should have exactly one child");
- return EINVAL;
- }
- if (!this->parents) {
- gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
- }
- ret = crypt_alloc_private(this);
- if (ret)
- return ret;
- ret = crypt_init_xlator(this);
- if (ret)
- goto error;
- this->local_pool = mem_pool_new(crypt_local_t, 64);
- if (!this->local_pool) {
- gf_log(this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- ret = ENOMEM;
- goto error;
- }
- gf_log("crypt", GF_LOG_INFO, "crypt xlator loaded");
- return 0;
-error:
- crypt_free_private(this);
- return ret;
-}
-
-void
-fini(xlator_t *this)
-{
- crypt_free_private(this);
-}
-
-struct xlator_fops fops = {.readv = crypt_readv,
- .writev = crypt_writev,
- .truncate = crypt_truncate,
- .ftruncate = crypt_ftruncate,
- .setxattr = crypt_setxattr,
- .fsetxattr = crypt_fsetxattr,
- .link = crypt_link,
- .unlink = crypt_unlink,
- .rename = crypt_rename,
- .open = crypt_open,
- .create = crypt_create,
- .stat = crypt_stat,
- .fstat = crypt_fstat,
- .lookup = crypt_lookup,
- .readdirp = crypt_readdirp,
- .access = crypt_access};
-
-struct xlator_cbks cbks = {.forget = crypt_forget};
-
-struct volume_options options[] = {
- {.key = {"master-key"},
- .type = GF_OPTION_TYPE_PATH,
- .description =
- "Pathname of regular file which contains master volume key"},
- {
- .key = {"data-key-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .description = "Data key size (bits)",
- .min = 256,
- .max = 512,
- .default_value = "256",
- },
- {.key = {"block-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .description = "Atom size (bits)",
- .min = 512,
- .max = 4096,
- .default_value = "4096"},
- {.key = {NULL}},
-};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
deleted file mode 100644
index 390eee831b1..00000000000
--- a/xlators/encryption/crypt/src/crypt.h
+++ /dev/null
@@ -1,931 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CRYPT_H__
-#define __CRYPT_H__
-
-#include <openssl/aes.h>
-#include <openssl/evp.h>
-#include <openssl/sha.h>
-#include <openssl/hmac.h>
-#include <openssl/cmac.h>
-#include <openssl/modes.h>
-#include "crypt-mem-types.h"
-#include "compat.h"
-
-#define CRYPT_XLATOR_ID (0)
-
-#define MAX_IOVEC_BITS (3)
-#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
-#define KEY_FACTOR_BITS (6)
-
-#define DEBUG_CRYPT (0)
-#define TRIVIAL_TFM (0)
-
-#define CRYPT_MIN_BLOCK_BITS (9)
-#define CRYPT_MAX_BLOCK_BITS (12)
-
-#define MASTER_VOL_KEY_SIZE (32)
-#define NMTD_VOL_KEY_SIZE (16)
-
-#if !defined(GF_LINUX_HOST_OS)
-typedef off_t loff_t;
-#endif
-
-struct crypt_key {
- uint32_t len;
- const char *label;
-};
-
-/*
- * Add new key types to the end of this
- * enumeration but before LAST_KEY_TYPE
- */
-typedef enum {
- MASTER_VOL_KEY,
- NMTD_VOL_KEY,
- NMTD_LINK_KEY,
- EMTD_FILE_KEY,
- DATA_FILE_KEY_256,
- DATA_FILE_KEY_512,
- LAST_KEY_TYPE
-} crypt_key_type;
-
-struct kderive_context {
- const unsigned char *pkey; /* parent key */
- uint32_t pkey_len; /* parent key size, bits */
- uint32_t ckey_len; /* child key size, bits */
- unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
- uint32_t fid_len; /* fid len, bytes */
- unsigned char *out; /* contains child keying material */
- uint32_t out_len; /* out len, bytes */
-};
-
-typedef enum { DATA_ATOM, HOLE_ATOM, LAST_DATA_TYPE } atom_data_type;
-
-typedef enum {
- HEAD_ATOM,
- TAIL_ATOM,
- FULL_ATOM,
- LAST_LOCALITY_TYPE
-} atom_locality_type;
-
-typedef enum {
- MTD_CREATE,
- MTD_APPEND,
- MTD_OVERWRITE,
- MTD_CUT,
- MTD_LAST_OP
-} mtd_op_t;
-
-struct xts128_context {
- void *key1, *key2;
- block128_f block1, block2;
-};
-
-struct object_cipher_info {
- cipher_alg_t o_alg;
- cipher_mode_t o_mode;
- uint32_t o_block_bits;
- uint32_t o_dkey_size; /* raw data key size in bits */
- union {
- struct {
- unsigned char ivec[16];
- AES_KEY dkey[2];
- AES_KEY tkey; /* key used for tweaking */
- XTS128_CONTEXT xts;
- } aes_xts;
- } u;
-};
-
-struct master_cipher_info {
- /*
- * attributes inherited by newly created regular files
- */
- cipher_alg_t m_alg;
- cipher_mode_t m_mode;
- uint32_t m_block_bits;
- uint32_t m_dkey_size; /* raw key size in bits */
- /*
- * master key
- */
- unsigned char m_key[MASTER_VOL_KEY_SIZE];
- /*
- * volume key for oid authentication
- */
- unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
-};
-
-/*
- * This info is not changed during file's life
- */
-struct crypt_inode_info {
-#if DEBUG_CRYPT
- loc_t *loc; /* pathname that the file has been
- opened, or created with */
-#endif
- uint16_t nr_minor;
- uuid_t oid;
- struct object_cipher_info cinfo;
-};
-
-/*
- * this should locate in secure memory
- */
-typedef struct {
- struct master_cipher_info master;
-} crypt_private_t;
-
-static inline struct master_cipher_info *
-get_master_cinfo(crypt_private_t *priv)
-{
- return &priv->master;
-}
-
-static inline struct object_cipher_info *
-get_object_cinfo(struct crypt_inode_info *info)
-{
- return &info->cinfo;
-}
-
-/*
- * this describes layouts and properties
- * of atoms in an aligned vector
- */
-struct avec_config {
- uint32_t atom_size;
- atom_data_type type;
- size_t orig_size;
- off_t orig_offset;
- size_t expanded_size;
- off_t aligned_offset;
-
- uint32_t off_in_head;
- uint32_t off_in_tail;
- uint32_t gap_in_tail;
- uint32_t nr_full_blocks;
-
- struct iovec *avec; /* aligned vector */
- uint32_t acount; /* number of avec components. The same
- * as number of occupied logical blocks */
- char **pool;
- uint32_t blocks_in_pool;
- uint32_t cursor; /* makes sense only for ordered writes,
- * so there is no races on this counter.
- *
- * Cursor is per-config object, we don't
- * reset cursor for atoms of different
- * localities (head, tail, full)
- */
-};
-
-typedef struct {
- glusterfs_fop_t fop; /* code of FOP this local info built for */
- fd_t *fd;
- inode_t *inode;
- loc_t *loc;
- int32_t mac_idx;
- loc_t *newloc;
- int32_t flags;
- int32_t wbflags;
- struct crypt_inode_info *info;
- struct iobref *iobref;
- struct iobref *iobref_data;
- off_t offset;
-
- uint64_t old_file_size; /* per FOP, retrieved under lock held */
- uint64_t cur_file_size; /* per iteration, before issuing IOs */
- uint64_t new_file_size; /* per iteration, after issuing IOs */
-
- uint64_t io_offset; /* offset of IOs issued per iteration */
- uint64_t io_offset_nopad; /* offset of user's data in the atom */
- uint32_t io_size; /* size of IOs issued per iteration */
- uint32_t io_size_nopad; /* size of user's data in the IOs */
- uint32_t eof_padding_size; /* size od EOF padding in the IOs */
-
- gf_lock_t call_lock; /* protect nr_calls from many cbks */
- int32_t nr_calls;
-
- atom_data_type active_setup; /* which setup (hole or date)
- is currently active */
- /* data setup */
- struct avec_config data_conf;
-
- /* hole setup */
- int hole_conv_in_proggress;
- gf_lock_t hole_lock; /* protect hole config from many cbks */
- int hole_handled;
- struct avec_config hole_conf;
- struct iatt buf;
- struct iatt prebuf;
- struct iatt postbuf;
- struct iatt *prenewparent;
- struct iatt *postnewparent;
- int32_t op_ret;
- int32_t op_errno;
- int32_t rw_count; /* total read or written */
- gf_lock_t rw_count_lock; /* protect the counter above */
- unsigned char *format; /* for create, update format string */
- uint32_t format_size;
- uint32_t msgflags; /* messages for crypt_open() */
- dict_t *xdata;
- dict_t *xattr;
- struct iovec vec; /* contains last file's atom for
- read-prune-write sequence */
- gf_boolean_t custom_mtd;
- /*
- * the next 3 fields are used by readdir and friends
- */
- gf_dirent_t *de; /* directory entry */
- char *de_path; /* pathname of directory entry */
- uint32_t de_prefix_len; /* length of the parent's pathname */
- gf_dirent_t *entries;
-
- uint32_t update_disk_file_size : 1;
-} crypt_local_t;
-
-/* This represents a (read)modify-write atom */
-struct rmw_atom {
- atom_locality_type locality;
- /*
- * read-modify-write sequence of the atom
- */
- int32_t (*rmw)(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vec,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata);
- /*
- * offset of the logical block in a file
- */
- loff_t (*offset_at)(call_frame_t *frame, struct object_cipher_info *object);
- /*
- * IO offset in an atom
- */
- uint32_t (*offset_in)(call_frame_t *frame,
- struct object_cipher_info *object);
- /*
- * number of bytes of plain text of this atom that user
- * wants to read/write.
- * It can be smaller than atom_size in the case of head
- * or tail atoms.
- */
- uint32_t (*io_size_nopad)(call_frame_t *frame,
- struct object_cipher_info *object);
- /*
- * which iovec represents the atom
- */
- struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
- /*
- * how many bytes of partial block should be uptodated by
- * reading from disk.
- * This is used to perform a read component of RMW (read-modify-write).
- */
- uint32_t (*count_to_uptodate)(call_frame_t *frame,
- struct object_cipher_info *object);
- struct avec_config *(*get_config)(call_frame_t *frame);
-};
-
-struct data_cipher_alg {
- gf_boolean_t atomic; /* true means that algorithm requires
- to pad data before cipher transform */
- gf_boolean_t should_pad; /* true means that algorithm requires
- to pad the end of file with extra-data */
- uint32_t blkbits; /* blksize = 1 << blkbits */
- /*
- * any preliminary sanity checks goes here
- */
- int32_t (*init)(void);
- /*
- * set alg-mode specific inode info
- */
- int32_t (*set_private)(struct crypt_inode_info *info,
- struct master_cipher_info *master);
- /*
- * check alg-mode specific data key
- */
- int32_t (*check_key)(uint32_t key_size);
- void (*set_iv)(off_t offset, struct object_cipher_info *object);
- int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
- size_t length, off_t offset, const int enc,
- struct object_cipher_info *object);
-};
-
-/*
- * version-dependent metadata loader
- */
-struct crypt_mtd_loader {
- /*
- * return core format size
- */
- size_t (*format_size)(mtd_op_t op, size_t old_size);
- /*
- * pack version-specific metadata of an object
- * at ->create()
- */
- int32_t (*create_format)(unsigned char *wire, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master);
- /*
- * extract version-specific metadata of an object
- * at ->open() time
- */
- int32_t (*open_format)(unsigned char *wire, int32_t len, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info);
- int32_t (*update_format)(unsigned char *new, unsigned char *old,
- size_t old_len, int32_t mac_idx, mtd_op_t op,
- loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master,
- crypt_local_t *local);
-};
-
-typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
-typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
-
-/* Declarations */
-
-/* keys.c */
-extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
-int32_t
-get_nmtd_vol_key(struct master_cipher_info *master);
-int32_t
-get_nmtd_link_key(loc_t *loc, struct master_cipher_info *master,
- unsigned char *result);
-int32_t
-get_emtd_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, unsigned char *result);
-int32_t
-get_data_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, uint32_t keysize,
- unsigned char *key);
-/* data.c */
-extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG]
- [LAST_CIPHER_MODE];
-void
-encrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off);
-void
-decrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off);
-int32_t
-align_iov_by_atoms(xlator_t *this, crypt_local_t *local,
- struct object_cipher_info *object,
- struct iovec *vec /* input vector */,
- int32_t count /* number of vec components */,
- struct iovec *avec /* aligned vector */,
- char **blocks /* pool of blocks */,
- uint32_t *blocks_allocated, struct avec_config *conf);
-int32_t
-set_config_avec_data(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, struct iovec *vec,
- int32_t vec_count);
-int32_t
-set_config_avec_hole(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, glusterfs_fop_t fop);
-void
-set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
- struct avec_config *conf, atom_data_type dtype);
-void
-set_config_offsets(call_frame_t *frame, xlator_t *this, uint64_t offset,
- uint64_t count, atom_data_type dtype,
- int32_t setup_gap_in_tail);
-
-/* metadata.c */
-extern struct crypt_mtd_loader mtd_loaders[LAST_MTD_LOADER];
-
-int32_t
-alloc_format(crypt_local_t *local, size_t size);
-int32_t
-alloc_format_create(crypt_local_t *local);
-void
-free_format(crypt_local_t *local);
-size_t
-format_size(mtd_op_t op, size_t old_size);
-size_t
-new_format_size(void);
-int32_t
-open_format(unsigned char *str, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info);
-int32_t
-update_format(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, mtd_op_t op, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local);
-int32_t
-create_format(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master);
-
-/* atom.c */
-struct rmw_atom *
-atom_by_types(atom_data_type data, atom_locality_type locality);
-void
-submit_partial(call_frame_t *frame, xlator_t *this, fd_t *fd,
- atom_locality_type ltype);
-void
-submit_full(call_frame_t *frame, xlator_t *this);
-
-/* crypt.c */
-
-end_writeback_handler_t
-dispatch_end_writeback(glusterfs_fop_t fop);
-void
-set_local_io_params_writev(call_frame_t *frame,
- struct object_cipher_info *object,
- struct rmw_atom *atom, off_t io_offset,
- uint32_t io_size);
-void
-link_wind(call_frame_t *frame, xlator_t *this);
-void
-unlink_wind(call_frame_t *frame, xlator_t *this);
-void
-link_unwind(call_frame_t *frame);
-void
-unlink_unwind(call_frame_t *frame);
-void
-rename_wind(call_frame_t *frame, xlator_t *this);
-void
-rename_unwind(call_frame_t *frame);
-
-/* Inline functions */
-
-static inline int32_t
-crypt_xlator_id(void)
-{
- return CRYPT_XLATOR_ID;
-}
-
-static inline mtd_loader_id
-current_mtd_loader(void)
-{
- return MTD_LOADER_V1;
-}
-
-static inline uint32_t
-master_key_size(void)
-{
- return crypt_keys[MASTER_VOL_KEY].len >> 3;
-}
-
-static inline uint32_t
-nmtd_vol_key_size(void)
-{
- return crypt_keys[NMTD_VOL_KEY].len >> 3;
-}
-
-static inline uint32_t
-alg_mode_blkbits(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].blkbits;
-}
-
-static inline uint32_t
-alg_mode_blksize(cipher_alg_t alg, cipher_mode_t mode)
-{
- return 1 << alg_mode_blkbits(alg, mode);
-}
-
-static inline gf_boolean_t
-alg_mode_atomic(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].atomic;
-}
-
-static inline gf_boolean_t
-alg_mode_should_pad(cipher_alg_t alg, cipher_mode_t mode)
-{
- return data_cipher_algs[alg][mode].should_pad;
-}
-
-static inline uint32_t
-master_alg_blksize(struct master_cipher_info *mr)
-{
- return alg_mode_blksize(mr->m_alg, mr->m_mode);
-}
-
-static inline uint32_t
-master_alg_blkbits(struct master_cipher_info *mr)
-{
- return alg_mode_blkbits(mr->m_alg, mr->m_mode);
-}
-
-static inline gf_boolean_t
-master_alg_atomic(struct master_cipher_info *mr)
-{
- return alg_mode_atomic(mr->m_alg, mr->m_mode);
-}
-
-static inline gf_boolean_t
-master_alg_should_pad(struct master_cipher_info *mr)
-{
- return alg_mode_should_pad(mr->m_alg, mr->m_mode);
-}
-
-static inline uint32_t
-object_alg_blksize(struct object_cipher_info *ob)
-{
- return alg_mode_blksize(ob->o_alg, ob->o_mode);
-}
-
-static inline uint32_t
-object_alg_blkbits(struct object_cipher_info *ob)
-{
- return alg_mode_blkbits(ob->o_alg, ob->o_mode);
-}
-
-static inline gf_boolean_t
-object_alg_atomic(struct object_cipher_info *ob)
-{
- return alg_mode_atomic(ob->o_alg, ob->o_mode);
-}
-
-static inline gf_boolean_t
-object_alg_should_pad(struct object_cipher_info *ob)
-{
- return alg_mode_should_pad(ob->o_alg, ob->o_mode);
-}
-
-static inline uint32_t
-aes_raw_key_size(struct master_cipher_info *master)
-{
- return master->m_dkey_size >> 3;
-}
-
-static inline struct avec_config *
-get_hole_conf(call_frame_t *frame)
-{
- return &(((crypt_local_t *)frame->local)->hole_conf);
-}
-
-static inline struct avec_config *
-get_data_conf(call_frame_t *frame)
-{
- return &(((crypt_local_t *)frame->local)->data_conf);
-}
-
-static inline int32_t
-get_atom_bits(struct object_cipher_info *object)
-{
- return object->o_block_bits;
-}
-
-static inline int32_t
-get_atom_size(struct object_cipher_info *object)
-{
- return 1 << get_atom_bits(object);
-}
-
-static inline int32_t
-has_head_block(struct avec_config *conf)
-{
- return conf->off_in_head || (conf->acount == 1 && conf->off_in_tail);
-}
-
-static inline int32_t
-has_tail_block(struct avec_config *conf)
-{
- return conf->off_in_tail && conf->acount > 1;
-}
-
-static inline int32_t
-has_full_blocks(struct avec_config *conf)
-{
- return conf->nr_full_blocks;
-}
-
-static inline int32_t
-should_submit_head_block(struct avec_config *conf)
-{
- return has_head_block(conf) && (conf->cursor == 0);
-}
-
-static inline int32_t
-should_submit_tail_block(struct avec_config *conf)
-{
- return has_tail_block(conf) && (conf->cursor == conf->acount - 1);
-}
-
-static inline int32_t
-should_submit_full_block(struct avec_config *conf)
-{
- uint32_t start = has_head_block(conf) ? 1 : 0;
-
- return has_full_blocks(conf) && conf->cursor >= start &&
- conf->cursor < start + conf->nr_full_blocks;
-}
-
-#if DEBUG_CRYPT
-static inline void
-crypt_check_input_len(size_t len, struct object_cipher_info *object)
-{
- if (object_alg_should_pad(object) &&
- (len & (object_alg_blksize(object) - 1)))
- gf_log("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len);
-}
-
-static inline void
-check_head_block(struct avec_config *conf)
-{
- if (!has_head_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a head atom");
-}
-
-static inline void
-check_tail_block(struct avec_config *conf)
-{
- if (!has_tail_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a tail atom");
-}
-
-static inline void
-check_full_block(struct avec_config *conf)
-{
- if (!has_full_blocks(conf))
- gf_log("crypt", GF_LOG_DEBUG, "not a full atom");
-}
-
-static inline void
-check_cursor_head(struct avec_config *conf)
-{
- if (!has_head_block(conf))
- gf_log("crypt", GF_LOG_DEBUG, "Illegal call of head atom method");
- else if (conf->cursor != 0)
- gf_log("crypt", GF_LOG_DEBUG, "Cursor (%d) is not at head atom",
- conf->cursor);
-}
-
-static inline void
-check_cursor_full(struct avec_config *conf)
-{
- if (!has_full_blocks(conf))
- gf_log("crypt", GF_LOG_DEBUG, "Illegal call of full atom method");
- if (has_head_block(conf) && (conf->cursor == 0))
- gf_log("crypt", GF_LOG_DEBUG, "Cursor is not at full atom");
-}
-
-/*
- * FIXME: use avec->iov_len to check setup
- */
-static inline int
-data_local_invariant(crypt_local_t *local)
-{
- return 0;
-}
-
-#else
-#define crypt_check_input_len(len, object) noop
-#define check_head_block(conf) noop
-#define check_tail_block(conf) noop
-#define check_full_block(conf) noop
-#define check_cursor_head(conf) noop
-#define check_cursor_full(conf) noop
-
-#endif /* DEBUG_CRYPT */
-
-static inline struct avec_config *
-conf_by_type(call_frame_t *frame, atom_data_type dtype)
-{
- struct avec_config *conf = NULL;
-
- switch (dtype) {
- case HOLE_ATOM:
- conf = get_hole_conf(frame);
- break;
- case DATA_ATOM:
- conf = get_data_conf(frame);
- break;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad atom type");
- }
- return conf;
-}
-
-static inline uint32_t
-nr_calls_head(struct avec_config *conf)
-{
- return has_head_block(conf) ? 1 : 0;
-}
-
-static inline uint32_t
-nr_calls_tail(struct avec_config *conf)
-{
- return has_tail_block(conf) ? 1 : 0;
-}
-
-static inline uint32_t
-nr_calls_full(struct avec_config *conf)
-{
- switch (conf->type) {
- case HOLE_ATOM:
- return has_full_blocks(conf);
- case DATA_ATOM:
- return has_full_blocks(conf)
- ? logical_blocks_occupied(0, conf->nr_full_blocks,
- MAX_IOVEC_BITS)
- : 0;
- default:
- gf_log("crypt", GF_LOG_DEBUG, "bad atom data type");
- return 0;
- }
-}
-
-static inline uint32_t
-nr_calls(struct avec_config *conf)
-{
- return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf);
-}
-
-static inline uint32_t
-nr_calls_data(call_frame_t *frame)
-{
- return nr_calls(get_data_conf(frame));
-}
-
-static inline uint32_t
-nr_calls_hole(call_frame_t *frame)
-{
- return nr_calls(get_hole_conf(frame));
-}
-
-static inline void
-get_one_call_nolock(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
-
- ++local->nr_calls;
-
- // gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1);
-}
-
-static inline void
-get_one_call(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
-
- LOCK(&local->call_lock);
- get_one_call_nolock(frame);
- UNLOCK(&local->call_lock);
-}
-
-static inline void
-get_nr_calls_nolock(call_frame_t *frame, int32_t nr)
-{
- crypt_local_t *local = frame->local;
-
- local->nr_calls += nr;
-
- // gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr);
-}
-
-static inline void
-get_nr_calls(call_frame_t *frame, int32_t nr)
-{
- crypt_local_t *local = frame->local;
-
- LOCK(&local->call_lock);
- get_nr_calls_nolock(frame, nr);
- UNLOCK(&local->call_lock);
-}
-
-static inline int
-put_one_call(crypt_local_t *local)
-{
- uint32_t last = 0;
-
- LOCK(&local->call_lock);
- if (--local->nr_calls == 0)
- last = 1;
-
- // gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1);
-
- UNLOCK(&local->call_lock);
- return last;
-}
-
-static inline int
-is_appended_write(call_frame_t *frame)
-{
- crypt_local_t *local = frame->local;
- struct avec_config *conf = get_data_conf(frame);
-
- return conf->orig_offset + conf->orig_size > local->old_file_size;
-}
-
-static inline int
-is_ordered_mode(call_frame_t *frame)
-{
-#if 0
- crypt_local_t *local = frame->local;
- return local->fop == GF_FOP_FTRUNCATE ||
- (local->fop == GF_FOP_WRITE && is_appended_write(frame));
-#endif
- return 1;
-}
-
-static inline int32_t
-hole_conv_completed(crypt_local_t *local)
-{
- struct avec_config *conf = &local->hole_conf;
- return conf->cursor == conf->acount;
-}
-
-static inline int32_t
-data_write_in_progress(crypt_local_t *local)
-{
- return local->active_setup == DATA_ATOM;
-}
-
-static inline int32_t
-parent_is_crypt_xlator(call_frame_t *frame, xlator_t *this)
-{
- return frame->parent->this == this;
-}
-
-static inline linkop_wind_handler_t
-linkop_wind_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return link_wind;
- case GF_FOP_UNLINK:
- return unlink_wind;
- case GF_FOP_RENAME:
- return rename_wind;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
- return NULL;
- }
-}
-
-static inline linkop_unwind_handler_t
-linkop_unwind_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return link_unwind;
- case GF_FOP_UNLINK:
- return unlink_unwind;
- case GF_FOP_RENAME:
- return rename_unwind;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
- return NULL;
- }
-}
-
-static inline mtd_op_t
-linkop_mtdop_dispatch(glusterfs_fop_t fop)
-{
- switch (fop) {
- case GF_FOP_LINK:
- return MTD_APPEND;
- case GF_FOP_UNLINK:
- return MTD_CUT;
- case GF_FOP_RENAME:
- return MTD_OVERWRITE;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop);
- return MTD_LAST_OP;
- }
-}
-
-#define CRYPT_STACK_UNWIND(fop, frame, params...) \
- do { \
- crypt_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT(fop, frame, params); \
- if (__local) { \
- GF_FREE(__local); \
- } \
- } while (0)
-
-#endif /* __CRYPT_H__ */
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c
deleted file mode 100644
index 8e8701b6bf2..00000000000
--- a/xlators/encryption/crypt/src/data.c
+++ /dev/null
@@ -1,715 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "crypt-common.h"
-#include "crypt.h"
-
-static void
-set_iv_aes_xts(off_t offset, struct object_cipher_info *object)
-{
- unsigned char *ivec;
-
- ivec = object->u.aes_xts.ivec;
-
- /* convert the tweak into a little-endian byte
- * array (IEEE P1619/D16, May 2007, section 5.1)
- */
-
- *((uint64_t *)ivec) = htole64(offset);
-
- /* ivec is padded with zeroes */
-}
-
-static int32_t
-aes_set_keys_common(unsigned char *raw_key, uint32_t key_size, AES_KEY *keys)
-{
- int32_t ret;
-
- ret = AES_set_encrypt_key(raw_key, key_size, &keys[AES_ENCRYPT]);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed");
- return ret;
- }
- ret = AES_set_decrypt_key(raw_key, key_size, &keys[AES_DECRYPT]);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed");
- return ret;
- }
- return 0;
-}
-
-/*
- * set private cipher info for xts mode
- */
-static int32_t
-set_private_aes_xts(struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int ret;
- struct object_cipher_info *object = get_object_cinfo(info);
- unsigned char *data_key;
- uint32_t subkey_size;
-
- /* init tweak value */
- memset(object->u.aes_xts.ivec, 0, 16);
-
- data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key);
- if (!data_key)
- return ENOMEM;
-
- /*
- * retrieve data keying material
- */
- ret = get_data_file_key(info, master, object->o_dkey_size, data_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key");
- GF_FREE(data_key);
- return ret;
- }
- /*
- * parse compound xts key
- */
- subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */
- /*
- * install key for data encryption
- */
- ret = aes_set_keys_common(data_key, subkey_size << 3,
- object->u.aes_xts.dkey);
- if (ret) {
- GF_FREE(data_key);
- return ret;
- }
- /*
- * set up key used to encrypt tweaks
- */
- ret = AES_set_encrypt_key(data_key + subkey_size, object->o_dkey_size / 2,
- &object->u.aes_xts.tkey);
- if (ret < 0)
- gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed");
-
- GF_FREE(data_key);
- return ret;
-}
-
-static int32_t
-aes_xts_init(void)
-{
- cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS));
- return 0;
-}
-
-static int32_t
-check_key_aes_xts(uint32_t keysize)
-{
- switch (keysize) {
- case 256:
- case 512:
- return 0;
- default:
- break;
- }
- return -1;
-}
-
-static int32_t
-encrypt_aes_xts(const unsigned char *from, unsigned char *to, size_t length,
- off_t offset, const int enc, struct object_cipher_info *object)
-{
- XTS128_CONTEXT ctx;
- if (enc) {
- ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT];
- ctx.block1 = (block128_f)AES_encrypt;
- } else {
- ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT];
- ctx.block1 = (block128_f)AES_decrypt;
- }
- ctx.key2 = &object->u.aes_xts.tkey;
- ctx.block2 = (block128_f)AES_encrypt;
-
- return CRYPTO_xts128_encrypt(&ctx, object->u.aes_xts.ivec, from, to, length,
- enc);
-}
-
-/*
- * Cipher input chunk @from of length @len;
- * @to: result of cipher transform;
- * @off: offset in a file (must be cblock-aligned);
- */
-static void
-cipher_data(struct object_cipher_info *object, char *from, char *to, off_t off,
- size_t len, const int enc)
-{
- crypt_check_input_len(len, object);
-
-#if TRIVIAL_TFM && DEBUG_CRYPT
- return;
-#endif
- data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object);
- data_cipher_algs[object->o_alg][object->o_mode].encrypt(
- (const unsigned char *)from, (unsigned char *)to, len, off, enc,
- object);
-}
-
-#define MAX_CIPHER_CHUNK (1 << 30)
-
-/*
- * Do cipher (encryption/decryption) transform of a
- * continuous region of memory.
- *
- * @len: a number of bytes to transform;
- * @buf: data to transform;
- * @off: offset in a file, should be block-aligned
- * for atomic cipher modes and ksize-aligned
- * for other modes).
- * @dir: direction of transform (encrypt/decrypt).
- */
-static void
-cipher_region(struct object_cipher_info *object, char *from, char *to,
- off_t off, size_t len, int dir)
-{
- while (len > 0) {
- size_t to_cipher;
-
- to_cipher = len;
- if (to_cipher > MAX_CIPHER_CHUNK)
- to_cipher = MAX_CIPHER_CHUNK;
-
- /* this will reset IV */
- cipher_data(object, from, to, off, to_cipher, dir);
- from += to_cipher;
- to += to_cipher;
- off += to_cipher;
- len -= to_cipher;
- }
-}
-
-/*
- * Do cipher transform (encryption/decryption) of
- * plaintext/ciphertext represented by @vec.
- *
- * Pre-conditions: @vec represents a continuous piece
- * of data in a file at offset @off to be ciphered
- * (encrypted/decrypted).
- * @count is the number of vec's components. All the
- * components must be block-aligned, the caller is
- * responsible for this. @dir is "direction" of
- * transform (encrypt/decrypt).
- */
-static void
-cipher_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off, int32_t dir)
-{
- int i;
- int len = 0;
-
- for (i = 0; i < count; i++) {
- cipher_region(object, vec[i].iov_base, vec[i].iov_base, off + len,
- vec[i].iov_len, dir);
- len += vec[i].iov_len;
- }
-}
-
-void
-encrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off)
-{
- cipher_aligned_iov(object, vec, count, off, 1);
-}
-
-void
-decrypt_aligned_iov(struct object_cipher_info *object, struct iovec *vec,
- int count, off_t off)
-{
- cipher_aligned_iov(object, vec, count, off, 0);
-}
-
-#if DEBUG_CRYPT
-static void
-compound_stream(struct iovec *vec, int count, char *buf, off_t skip)
-{
- int i;
- int off = 0;
- for (i = 0; i < count; i++) {
- memcpy(buf + off, vec[i].iov_base + skip, vec[i].iov_len - skip);
-
- off += (vec[i].iov_len - skip);
- skip = 0;
- }
-}
-
-static void
-check_iovecs(struct iovec *vec, int cnt, struct iovec *avec, int acnt,
- uint32_t off_in_head)
-{
- char *s1, *s2;
- uint32_t size, asize;
-
- size = iov_length(vec, cnt);
- asize = iov_length(avec, acnt) - off_in_head;
- if (size != asize) {
- gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d", size,
- asize);
- return;
- }
- s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
- if (!s1) {
- gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
- return;
- }
- s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
- if (!s2) {
- GF_FREE(s1);
- gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
- return;
- }
- compound_stream(vec, cnt, s1, 0);
- compound_stream(avec, acnt, s2, off_in_head);
- if (memcmp(s1, s2, size))
- gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
- GF_FREE(s1);
- GF_FREE(s2);
-}
-
-#else
-#define check_iovecs(vec, count, avec, avecn, off) noop
-#endif /* DEBUG_CRYPT */
-
-static char *
-data_alloc_block(xlator_t *this, crypt_local_t *local, int32_t block_size)
-{
- struct iobuf *iobuf = NULL;
-
- iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size);
- if (!iobuf) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to get iobuf");
- return NULL;
- }
- if (!local->iobref_data) {
- local->iobref_data = iobref_new();
- if (!local->iobref_data) {
- gf_log("crypt", GF_LOG_ERROR, "Failed to get iobref");
- iobuf_unref(iobuf);
- return NULL;
- }
- }
- iobref_add(local->iobref_data, iobuf);
- return iobuf->ptr;
-}
-
-/*
- * Compound @avec, which represent the same data
- * chunk as @vec, but has aligned components of
- * specified block size. Alloc blocks, if needed.
- * In particular, incomplete head and tail blocks
- * must be allocated.
- * Put number of allocated blocks to @num_blocks.
- *
- * Example:
- *
- * input: data chunk represented by 4 components
- * [AB],[BC],[CD],[DE];
- * output: 5 logical blocks (0, 1, 2, 3, 4).
- *
- * A B C D E
- * *-----*+------*-+---*----+--------+-*
- * | || | | | | | |
- * *-+-----+*------+-*---+----*--------*-+------*
- * 0 1 2 3 4
- *
- * 0 - incomplete compound (head);
- * 1, 2 - full compound;
- * 3 - full non-compound (the case of reuse);
- * 4 - incomplete non-compound (tail).
- */
-int32_t
-align_iov_by_atoms(xlator_t *this, crypt_local_t *local,
- struct object_cipher_info *object,
- struct iovec *vec /* input vector */,
- int32_t count /* number of vec components */,
- struct iovec *avec /* aligned vector */,
- char **blocks /* pool of blocks */,
- uint32_t *blocks_allocated, struct avec_config *conf)
-{
- int vecn = 0; /* number of the current component in vec */
- int avecn = 0; /* number of the current component in avec */
- off_t vec_off = 0; /* offset in the current vec component,
- * i.e. the number of bytes have already
- * been copied */
- int32_t block_size = get_atom_size(object);
- size_t to_process; /* number of vec's bytes to copy and(or) re-use */
- int32_t off_in_head = conf->off_in_head;
-
- to_process = iov_length(vec, count);
-
- while (to_process > 0) {
- if (off_in_head || vec[vecn].iov_len - vec_off < block_size) {
- /*
- * less than block_size:
- * the case of incomplete (head or tail),
- * or compound block
- */
- size_t copied = 0;
- /*
- * populate the pool with a new block
- */
- blocks[*blocks_allocated] = data_alloc_block(this, local,
- block_size);
- if (!blocks[*blocks_allocated])
- return -ENOMEM;
- memset(blocks[*blocks_allocated], 0, off_in_head);
- /*
- * fill the block with vec components
- */
- do {
- size_t to_copy;
-
- to_copy = vec[vecn].iov_len - vec_off;
- if (to_copy > block_size - off_in_head)
- to_copy = block_size - off_in_head;
-
- memcpy(blocks[*blocks_allocated] + off_in_head + copied,
- vec[vecn].iov_base + vec_off, to_copy);
-
- copied += to_copy;
- to_process -= to_copy;
-
- vec_off += to_copy;
- if (vec_off == vec[vecn].iov_len) {
- /* finished with this vecn */
- vec_off = 0;
- vecn++;
- }
- } while (copied < (block_size - off_in_head) && to_process > 0);
- /*
- * update avec
- */
- avec[avecn].iov_len = off_in_head + copied;
- avec[avecn].iov_base = blocks[*blocks_allocated];
-
- (*blocks_allocated)++;
- off_in_head = 0;
- } else {
- /*
- * the rest of the current vec component
- * is not less than block_size, so reuse
- * the memory buffer of the component.
- */
- size_t to_reuse;
- to_reuse = (to_process > block_size ? block_size : to_process);
- avec[avecn].iov_len = to_reuse;
- avec[avecn].iov_base = vec[vecn].iov_base + vec_off;
-
- vec_off += to_reuse;
- if (vec_off == vec[vecn].iov_len) {
- /* finished with this vecn */
- vec_off = 0;
- vecn++;
- }
- to_process -= to_reuse;
- }
- avecn++;
- }
- check_iovecs(vec, count, avec, avecn, conf->off_in_head);
- return 0;
-}
-
-/*
- * allocate and setup aligned vector for data submission
- * Pre-condition: @conf is set.
- */
-int32_t
-set_config_avec_data(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, struct iovec *vec,
- int32_t vec_count)
-{
- int32_t ret = ENOMEM;
- struct iovec *avec;
- char **pool;
- uint32_t blocks_in_pool = 0;
-
- conf->type = DATA_ATOM;
-
- avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec);
- if (!avec)
- return ret;
- pool = GF_CALLOC(conf->acount, sizeof(*pool), gf_crypt_mt_char);
- if (!pool) {
- GF_FREE(avec);
- return ret;
- }
- if (!vec) {
- /*
- * degenerated case: no data
- */
- pool[0] = data_alloc_block(this, local, get_atom_size(object));
- if (!pool[0])
- goto free;
- blocks_in_pool = 1;
- avec->iov_base = pool[0];
- avec->iov_len = conf->off_in_tail;
- } else {
- ret = align_iov_by_atoms(this, local, object, vec, vec_count, avec,
- pool, &blocks_in_pool, conf);
- if (ret)
- goto free;
- }
- conf->avec = avec;
- conf->pool = pool;
- conf->blocks_in_pool = blocks_in_pool;
- return 0;
-free:
- GF_FREE(avec);
- GF_FREE(pool);
- return ret;
-}
-
-/*
- * allocate and setup aligned vector for hole submission
- */
-int32_t
-set_config_avec_hole(xlator_t *this, crypt_local_t *local,
- struct avec_config *conf,
- struct object_cipher_info *object, glusterfs_fop_t fop)
-{
- uint32_t i, idx;
- struct iovec *avec;
- char **pool;
- uint32_t num_blocks;
- uint32_t blocks_in_pool = 0;
-
- conf->type = HOLE_ATOM;
-
- num_blocks = conf->acount -
- (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0);
-
- switch (fop) {
- case GF_FOP_WRITE:
- /*
- * hole goes before data
- */
- if (num_blocks == 1 && conf->off_in_tail != 0)
- /*
- * we won't submit a hole which fits into
- * a data atom: this part of hole will be
- * submitted with data write
- */
- return 0;
- break;
- case GF_FOP_FTRUNCATE:
- /*
- * expanding truncate, hole goes after data,
- * and will be submitted in any case.
- */
- break;
- default:
- gf_log("crypt", GF_LOG_WARNING, "bad file operation %d", fop);
- return 0;
- }
- avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec);
- if (!avec)
- return ENOMEM;
- pool = GF_CALLOC(num_blocks, sizeof(*pool), gf_crypt_mt_char);
- if (!pool) {
- GF_FREE(avec);
- return ENOMEM;
- }
- for (i = 0; i < num_blocks; i++) {
- pool[i] = data_alloc_block(this, local, get_atom_size(object));
- if (pool[i] == NULL)
- goto free;
- blocks_in_pool++;
- }
- if (has_head_block(conf)) {
- /* set head block */
- idx = 0;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- memset(avec[idx].iov_base + conf->off_in_head, 0,
- get_atom_size(object) - conf->off_in_head);
- }
- if (has_tail_block(conf)) {
- /* set tail block */
- idx = num_blocks - 1;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- memset(avec[idx].iov_base, 0, conf->off_in_tail);
- }
- if (has_full_blocks(conf)) {
- /* set full block */
- idx = conf->off_in_head ? 1 : 0;
- avec[idx].iov_base = pool[idx];
- avec[idx].iov_len = get_atom_size(object);
- /*
- * since we re-use the buffer,
- * zeroes will be set every time
- * before encryption, see submit_full()
- */
- }
- conf->avec = avec;
- conf->pool = pool;
- conf->blocks_in_pool = blocks_in_pool;
- return 0;
-free:
- GF_FREE(avec);
- GF_FREE(pool);
- return ENOMEM;
-}
-
-/* A helper for setting up config of partial atoms (which
- * participate in read-modify-write sequence).
- *
- * Calculate and setup precise amount of "extra-bytes"
- * that should be uptodated at the end of partial (not
- * necessarily tail!) block.
- *
- * Pre-condition: local->old_file_size is valid!
- * @conf contains setup, which is enough for correct calculation
- * of has_tail_block(), ->get_offset().
- */
-void
-set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
- struct avec_config *conf, atom_data_type dtype)
-{
- uint32_t to_block;
- crypt_local_t *local = frame->local;
- uint64_t old_file_size = local->old_file_size;
- struct rmw_atom *partial = atom_by_types(
- dtype, has_tail_block(conf) ? TAIL_ATOM : HEAD_ATOM);
-
- if (old_file_size <= partial->offset_at(frame, object))
- to_block = 0;
- else {
- to_block = old_file_size - partial->offset_at(frame, object);
- if (to_block > get_atom_size(object))
- to_block = get_atom_size(object);
- }
- if (to_block > conf->off_in_tail)
- conf->gap_in_tail = to_block - conf->off_in_tail;
- else
- /*
- * nothing to uptodate
- */
- conf->gap_in_tail = 0;
-}
-
-/*
- * fill struct avec_config with offsets layouts
- */
-void
-set_config_offsets(call_frame_t *frame, xlator_t *this, uint64_t offset,
- uint64_t count, atom_data_type dtype, int32_t set_gap)
-{
- crypt_local_t *local;
- struct object_cipher_info *object;
- struct avec_config *conf;
- uint32_t resid;
-
- uint32_t atom_size;
- uint32_t atom_bits;
-
- size_t orig_size;
- off_t orig_offset;
- size_t expanded_size;
- off_t aligned_offset;
-
- uint32_t off_in_head = 0;
- uint32_t off_in_tail = 0;
- uint32_t nr_full_blocks;
- int32_t size_full_blocks;
-
- uint32_t acount; /* number of aligned components to write.
- * The same as number of occupied logical
- * blocks (atoms)
- */
- local = frame->local;
- object = &local->info->cinfo;
- conf = (dtype == DATA_ATOM ? get_data_conf(frame) : get_hole_conf(frame));
-
- orig_offset = offset;
- orig_size = count;
-
- atom_size = get_atom_size(object);
- atom_bits = get_atom_bits(object);
-
- /*
- * Round-down the start,
- * round-up the end.
- */
- resid = offset & (uint64_t)(atom_size - 1);
-
- if (resid)
- off_in_head = resid;
- aligned_offset = offset - off_in_head;
- expanded_size = orig_size + off_in_head;
-
- /* calculate tail,
- expand size forward */
- resid = (offset + orig_size) & (uint64_t)(atom_size - 1);
-
- if (resid) {
- off_in_tail = resid;
- expanded_size += (atom_size - off_in_tail);
- }
- /*
- * calculate number of occupied blocks
- */
- acount = expanded_size >> atom_bits;
- /*
- * calculate number of full blocks
- */
- size_full_blocks = expanded_size;
- if (off_in_head)
- size_full_blocks -= atom_size;
- if (off_in_tail && size_full_blocks > 0)
- size_full_blocks -= atom_size;
- nr_full_blocks = size_full_blocks >> atom_bits;
-
- conf->atom_size = atom_size;
- conf->orig_size = orig_size;
- conf->orig_offset = orig_offset;
- conf->expanded_size = expanded_size;
- conf->aligned_offset = aligned_offset;
-
- conf->off_in_head = off_in_head;
- conf->off_in_tail = off_in_tail;
- conf->nr_full_blocks = nr_full_blocks;
- conf->acount = acount;
- /*
- * Finally, calculate precise amount of
- * "extra-bytes" that should be uptodated
- * at the end.
- * Only if RMW is expected.
- */
- if (off_in_tail && set_gap)
- set_gap_at_end(frame, object, conf, dtype);
-}
-
-struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = {
- [AES_CIPHER_ALG][XTS_CIPHER_MODE] = {.atomic = _gf_true,
- .should_pad = _gf_true,
- .blkbits = AES_BLOCK_BITS,
- .init = aes_xts_init,
- .set_private = set_private_aes_xts,
- .check_key = check_key_aes_xts,
- .set_iv = set_iv_aes_xts,
- .encrypt = encrypt_aes_xts}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c
deleted file mode 100644
index a9357005a36..00000000000
--- a/xlators/encryption/crypt/src/keys.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "crypt-common.h"
-#include "crypt.h"
-
-/* Key hierarchy
-
- +----------------+
- | MASTER_VOL_KEY |
- +-------+--------+
- |
- |
- +----------------+----------------+
- | | |
- | | |
- +-------+------+ +-------+-------+ +------+--------+
- | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY |
- +-------+------+ +---------------+ +---------------+
- |
- |
- +-------+-------+
- | NMTD_LINK_KEY |
- +---------------+
-
- */
-
-#if DEBUG_CRYPT
-static void
-check_prf_iters(uint32_t num_iters)
-{
- if (num_iters == 0)
- gf_log("crypt", GF_LOG_DEBUG, "bad number of prf iterations : %d",
- num_iters);
-}
-#else
-#define check_prf_iters(num_iters) noop
-#endif /* DEBUG_CRYPT */
-
-unsigned char crypt_fake_oid[16] = {0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0};
-
-/*
- * derive key in the counter mode using
- * sha256-based HMAC as PRF, see
- * NIST Special Publication 800-108, 5.1)
- */
-
-#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH
-
-static int32_t
-kderive_init(struct kderive_context *ctx,
- const unsigned char *pkey, /* parent key */
- uint32_t pkey_size, /* parent key size */
- const unsigned char *idctx, /* id-context */
- uint32_t idctx_size, crypt_key_type type /* type of child key */)
-{
- unsigned char *pos;
- uint32_t llen = strlen(crypt_keys[type].label);
- /*
- * Compoud the fixed input data for KDF:
- * [i]_2 || Label || 0x00 || Id-Context || [L]_2),
- * NIST SP 800-108, 5.1
- */
- ctx->fid_len = sizeof(uint32_t) + llen + 1 + idctx_size + sizeof(uint32_t);
-
- ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key);
- if (!ctx->fid)
- return ENOMEM;
- ctx->out_len = round_up(crypt_keys[type].len >> 3, PRF_OUTPUT_SIZE);
- ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key);
- if (!ctx->out) {
- GF_FREE(ctx->fid);
- return ENOMEM;
- }
- ctx->pkey = pkey;
- ctx->pkey_len = pkey_size;
- ctx->ckey_len = crypt_keys[type].len;
-
- pos = ctx->fid;
-
- /* counter will be set up in kderive_rfn() */
- pos += sizeof(uint32_t);
-
- memcpy(pos, crypt_keys[type].label, llen);
- pos += llen;
-
- /* set up zero octet */
- *pos = 0;
- pos += 1;
-
- memcpy(pos, idctx, idctx_size);
- pos += idctx_size;
-
- *((uint32_t *)pos) = htobe32(ctx->ckey_len);
-
- return 0;
-}
-
-static void
-kderive_update(struct kderive_context *ctx)
-{
- uint32_t i;
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX hctx;
-#endif
- HMAC_CTX *phctx = NULL;
- unsigned char *pos = ctx->out;
- uint32_t *p_iter = (uint32_t *)ctx->fid;
- uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE;
-
- check_prf_iters(num_iters);
-
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX_init(&hctx);
- phctx = &hctx;
-#else
- phctx = HMAC_CTX_new();
- /* I guess we presume it was successful? */
-#endif
- for (i = 0; i < num_iters; i++) {
- /*
- * update the iteration number in the fid
- */
- *p_iter = htobe32(i);
- HMAC_Init_ex(phctx, ctx->pkey, ctx->pkey_len >> 3, EVP_sha256(), NULL);
- HMAC_Update(phctx, ctx->fid, ctx->fid_len);
- HMAC_Final(phctx, pos, NULL);
-
- pos += PRF_OUTPUT_SIZE;
- }
-#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
- HMAC_CTX_cleanup(phctx);
-#else
- HMAC_CTX_free(phctx);
-#endif
-}
-
-static void
-kderive_final(struct kderive_context *ctx, unsigned char *child)
-{
- memcpy(child, ctx->out, ctx->ckey_len >> 3);
- GF_FREE(ctx->fid);
- GF_FREE(ctx->out);
- memset(ctx, 0, sizeof(*ctx));
-}
-
-/*
- * derive per-volume key for object ids aithentication
- */
-int32_t
-get_nmtd_vol_key(struct master_cipher_info *master)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_key, master_key_size(), crypt_fake_oid,
- sizeof(uuid_t), NMTD_VOL_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, master->m_nmtd_key);
- return 0;
-}
-
-/*
- * derive per-link key for aithentication of non-encrypted
- * meta-data (nmtd)
- */
-int32_t
-get_nmtd_link_key(loc_t *loc, struct master_cipher_info *master,
- unsigned char *result)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_nmtd_key, nmtd_vol_key_size(),
- (const unsigned char *)loc->path, strlen(loc->path),
- NMTD_LINK_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, result);
- return 0;
-}
-
-/*
- * derive per-file key for encryption and authentication
- * of encrypted part of metadata (emtd)
- */
-int32_t
-get_emtd_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, unsigned char *result)
-{
- int32_t ret;
- struct kderive_context ctx;
-
- ret = kderive_init(&ctx, master->m_key, master_key_size(), info->oid,
- sizeof(uuid_t), EMTD_FILE_KEY);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, result);
- return 0;
-}
-
-static int32_t
-data_key_type_by_size(uint32_t keysize, crypt_key_type *type)
-{
- int32_t ret = 0;
- switch (keysize) {
- case 256:
- *type = DATA_FILE_KEY_256;
- break;
- case 512:
- *type = DATA_FILE_KEY_512;
- break;
- default:
- gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d",
- keysize);
- ret = ENOTSUP;
- break;
- }
- return ret;
-}
-
-/*
- * derive per-file key for data encryption
- */
-int32_t
-get_data_file_key(struct crypt_inode_info *info,
- struct master_cipher_info *master, uint32_t keysize,
- unsigned char *key)
-{
- int32_t ret;
- struct kderive_context ctx;
- crypt_key_type type;
-
- ret = data_key_type_by_size(keysize, &type);
- if (ret)
- return ret;
- ret = kderive_init(&ctx, master->m_key, master_key_size(), info->oid,
- sizeof(uuid_t), type);
- if (ret)
- return ret;
- kderive_update(&ctx);
- kderive_final(&ctx, key);
- return 0;
-}
-
-/*
- * NOTE: Don't change existing keys: it will break compatibility;
- */
-struct crypt_key crypt_keys[LAST_KEY_TYPE] = {
- [MASTER_VOL_KEY] =
- {
- .len = MASTER_VOL_KEY_SIZE << 3,
- .label = "volume-master",
- },
- [NMTD_VOL_KEY] = {.len = NMTD_VOL_KEY_SIZE << 3,
- .label = "volume-nmtd-key-generation"},
- [NMTD_LINK_KEY] = {.len = 128, .label = "link-nmtd-authentication"},
- [EMTD_FILE_KEY] = {.len = 128, .label = "file-emtd-encryption-and-auth"},
- [DATA_FILE_KEY_256] = {.len = 256, .label = "file-data-encryption-256"},
- [DATA_FILE_KEY_512] = {.len = 512, .label = "file-data-encryption-512"}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c
deleted file mode 100644
index 90c53a9f721..00000000000
--- a/xlators/encryption/crypt/src/metadata.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "defaults.h"
-#include "crypt-common.h"
-#include "crypt.h"
-#include "metadata.h"
-
-int32_t
-alloc_format(crypt_local_t *local, size_t size)
-{
- if (size > 0) {
- local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd);
- if (!local->format)
- return ENOMEM;
- }
- local->format_size = size;
- return 0;
-}
-
-int32_t
-alloc_format_create(crypt_local_t *local)
-{
- return alloc_format(local, new_format_size());
-}
-
-void
-free_format(crypt_local_t *local)
-{
- GF_FREE(local->format);
-}
-
-/*
- * Check compatibility with extracted metadata
- */
-static int32_t
-check_file_metadata(struct crypt_inode_info *info)
-{
- struct object_cipher_info *object = &info->cinfo;
-
- if (info->nr_minor != CRYPT_XLATOR_ID) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported minor subversion %d",
- info->nr_minor);
- return EINVAL;
- }
- if (object->o_alg > LAST_CIPHER_ALG) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported cipher algorithm %d",
- object->o_alg);
- return EINVAL;
- }
- if (object->o_mode > LAST_CIPHER_MODE) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported cipher mode %d",
- object->o_mode);
- return EINVAL;
- }
- if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS ||
- object->o_block_bits > CRYPT_MAX_BLOCK_BITS) {
- gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d",
- object->o_block_bits);
- return EINVAL;
- }
- /* TBD: check data key size */
- return 0;
-}
-
-static size_t
-format_size_v1(mtd_op_t op, size_t old_size)
-{
- switch (op) {
- case MTD_CREATE:
- return sizeof(struct mtd_format_v1);
- case MTD_OVERWRITE:
- return old_size;
- case MTD_APPEND:
- return old_size + NMTD_8_MAC_SIZE;
- case MTD_CUT:
- if (old_size > sizeof(struct mtd_format_v1))
- return old_size - NMTD_8_MAC_SIZE;
- else
- return 0;
- default:
- gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation");
- return 0;
- }
-}
-
-/*
- * Calculate size of the updated format string.
- * Returned zero means that we don't need to update the format string.
- */
-size_t
-format_size(mtd_op_t op, size_t old_size)
-{
- size_t versioned;
-
- versioned = mtd_loaders[current_mtd_loader()].format_size(
- op, old_size - sizeof(struct crypt_format));
- if (versioned != 0)
- return versioned + sizeof(struct crypt_format);
- return 0;
-}
-
-/*
- * size of the format string of newly created file (nr_links = 1)
- */
-size_t
-new_format_size(void)
-{
- return format_size(MTD_CREATE, 0);
-}
-
-/*
- * Calculate per-link MAC by pathname
- */
-static int32_t
-calc_link_mac_v1(struct mtd_format_v1 *fmt, loc_t *loc, unsigned char *result,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char nmtd_link_key[16];
- CMAC_CTX *cctx;
- size_t len;
-
- ret = get_nmtd_link_key(loc, master, nmtd_link_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key");
- return -1;
- }
- cctx = CMAC_CTX_new();
- if (!cctx) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed");
- return -1;
- }
- ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key),
- EVP_aes_128_cbc(), 0);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed");
- CMAC_CTX_free(cctx);
- return -1;
- }
- ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed");
- CMAC_CTX_free(cctx);
- return -1;
- }
- ret = CMAC_Final(cctx, result, &len);
- CMAC_CTX_free(cctx);
- if (!ret) {
- gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed");
- return -1;
- }
- return 0;
-}
-
-/*
- * Create per-link MAC of index @idx by pathname
- */
-static int32_t
-create_link_mac_v1(struct mtd_format_v1 *fmt, uint32_t idx, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char *mac;
- unsigned char cmac[16];
-
- mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
-
- ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
- if (ret)
- return -1;
- memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC);
- return 0;
-}
-
-static int32_t
-create_format_v1(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- struct mtd_format_v1 *fmt;
- unsigned char mtd_key[16];
- AES_KEY EMTD_KEY;
- unsigned char nmtd_link_key[16];
- uint32_t ad;
- GCM128_CONTEXT *gctx;
-
- fmt = (struct mtd_format_v1 *)wire;
-
- fmt->minor_id = info->nr_minor;
- fmt->alg_id = AES_CIPHER_ALG;
- fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS;
- fmt->block_bits = master->m_block_bits;
- fmt->mode_id = master->m_mode;
- /*
- * retrieve keys for the parts of metadata
- */
- ret = get_emtd_file_key(info, master, mtd_key);
- if (ret)
- return ret;
- ret = get_nmtd_link_key(loc, master, nmtd_link_key);
- if (ret)
- return ret;
-
- AES_set_encrypt_key(mtd_key, sizeof(mtd_key) * 8, &EMTD_KEY);
-
- gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
-
- /* TBD: Check return values */
-
- CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
-
- ad = htole32(MTD_LOADER_V1);
- ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
- CRYPTO_gcm128_release(gctx);
- return ret;
- }
- ret = CRYPTO_gcm128_encrypt(gctx, get_EMTD_V1(fmt), get_EMTD_V1(fmt),
- SIZE_OF_EMTD_V1);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed");
- CRYPTO_gcm128_release(gctx);
- return ret;
- }
- /*
- * set MAC of encrypted part of metadata
- */
- CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC);
- CRYPTO_gcm128_release(gctx);
- /*
- * set the first MAC of non-encrypted part of metadata
- */
- return create_link_mac_v1(fmt, 0, loc, info, master);
-}
-
-/*
- * Called by fops:
- * ->create();
- * ->link();
- *
- * Pack common and version-specific parts of file's metadata
- * Pre-conditions: @info contains valid object-id.
- */
-int32_t
-create_format(unsigned char *wire, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- struct crypt_format *fmt = (struct crypt_format *)wire;
-
- fmt->loader_id = current_mtd_loader();
-
- wire += sizeof(struct crypt_format);
- return mtd_loaders[current_mtd_loader()].create_format(wire, loc, info,
- master);
-}
-
-/*
- * Append or overwrite per-link mac of @mac_idx index
- * in accordance with the new pathname
- */
-int32_t
-appov_link_mac_v1(unsigned char *new, unsigned char *old, uint32_t old_size,
- int32_t mac_idx, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- memcpy(new, old, old_size);
- return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx, loc, info,
- master);
-}
-
-/*
- * Cut per-link mac of @mac_idx index
- */
-static int32_t
-cut_link_mac_v1(unsigned char *new, unsigned char *old, uint32_t old_size,
- int32_t mac_idx, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- memcpy(new, old,
- sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1));
-
- memcpy(
- new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE *(mac_idx - 1),
- old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx,
- old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx));
- return 0;
-}
-
-int32_t
-update_format_v1(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, /* of old name */
- mtd_op_t op, loc_t *loc, struct crypt_inode_info *info,
- struct master_cipher_info *master, crypt_local_t *local)
-{
- switch (op) {
- case MTD_APPEND:
- mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1)) / 8;
- case MTD_OVERWRITE:
- return appov_link_mac_v1(new, old, old_len, mac_idx, loc, info,
- master, local);
- case MTD_CUT:
- return cut_link_mac_v1(new, old, old_len, mac_idx, loc, info,
- master, local);
- default:
- gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op);
- return -1;
- }
-}
-
-/*
- * Called by fops:
- *
- * ->link()
- * ->unlink()
- * ->rename()
- *
- */
-int32_t
-update_format(unsigned char *new, unsigned char *old, size_t old_len,
- int32_t mac_idx, mtd_op_t op, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local)
-{
- if (!new)
- return 0;
- memcpy(new, old, sizeof(struct crypt_format));
-
- old += sizeof(struct crypt_format);
- new += sizeof(struct crypt_format);
- old_len -= sizeof(struct crypt_format);
-
- return mtd_loaders[current_mtd_loader()].update_format(
- new, old, old_len, mac_idx, op, loc, info, master, local);
-}
-
-/*
- * Perform preliminary checks of found metadata
- * Return < 0 on errors;
- * Return number of object-id MACs (>= 1) on success
- */
-int32_t
-check_format_v1(uint32_t len, unsigned char *wire)
-{
- uint32_t nr_links;
-
- if (len < sizeof(struct mtd_format_v1)) {
- gf_log("crypt", GF_LOG_ERROR, "v1-loader: bad metadata size %d", len);
- goto error;
- }
- len -= sizeof(struct mtd_format_v1);
- if (len % sizeof(nmtd_8_mac_t)) {
- gf_log("crypt", GF_LOG_ERROR, "v1-loader: bad metadata format");
- goto error;
- }
- nr_links = 1 + len / sizeof(nmtd_8_mac_t);
- if (nr_links > _POSIX_LINK_MAX)
- goto error;
- return nr_links;
-error:
- return EIO;
-}
-
-/*
- * Verify per-link MAC specified by index @idx
- *
- * return:
- * -1 on errors;
- * 0 on failed verification;
- * 1 on successful verification
- */
-static int32_t
-verify_link_mac_v1(struct mtd_format_v1 *fmt,
- uint32_t idx /* index of the mac to verify */, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- unsigned char *mac;
- unsigned char cmac[16];
-
- mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
-
- ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
- if (ret)
- return -1;
- if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC))
- return 0;
- return 1;
-}
-
-/*
- * Lookup per-link MAC by pathname.
- *
- * return index of the MAC, if it was found;
- * return < 0 on errors, or if the MAC wasn't found
- */
-static int32_t
-lookup_link_mac_v1(struct mtd_format_v1 *fmt, uint32_t nr_macs, loc_t *loc,
- struct crypt_inode_info *info,
- struct master_cipher_info *master)
-{
- int32_t ret;
- uint32_t idx;
-
- for (idx = 0; idx < nr_macs; idx++) {
- ret = verify_link_mac_v1(fmt, idx, loc, info, master);
- if (ret < 0)
- return ret;
- if (ret > 0)
- return idx;
- }
- return -ENOENT;
-}
-
-/*
- * Extract version-specific part of metadata
- */
-static int32_t
-open_format_v1(unsigned char *wire, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info)
-{
- int32_t ret;
- int32_t num_nmtd_macs;
- struct mtd_format_v1 *fmt;
- unsigned char mtd_key[16];
- AES_KEY EMTD_KEY;
- GCM128_CONTEXT *gctx;
- uint32_t ad;
- emtd_8_mac_t gmac;
- struct object_cipher_info *object;
-
- num_nmtd_macs = check_format_v1(len, wire);
- if (num_nmtd_macs <= 0)
- return EIO;
-
- ret = lookup_link_mac_v1((struct mtd_format_v1 *)wire, num_nmtd_macs, loc,
- info, master);
- if (ret < 0) {
- gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed");
- return EINVAL;
- }
-
- local->mac_idx = ret;
- if (load_info == _gf_false)
- /* the case of partial open */
- return 0;
-
- fmt = GF_MALLOC(len, gf_crypt_mt_mtd);
- if (!fmt)
- return ENOMEM;
- memcpy(fmt, wire, len);
-
- object = &info->cinfo;
-
- ret = get_emtd_file_key(info, master, mtd_key);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key");
- goto out;
- }
- /*
- * decrypt encrypted meta-data
- */
- ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key) * 8, &EMTD_KEY);
- if (ret < 0) {
- gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key");
- ret = EIO;
- goto out;
- }
- gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
- if (!gctx) {
- gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context");
- ret = ENOMEM;
- goto out;
- }
- CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
-
- ad = htole32(MTD_LOADER_V1);
- ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
- CRYPTO_gcm128_release(gctx);
- ret = EIO;
- goto out;
- }
- ret = CRYPTO_gcm128_decrypt(gctx, get_EMTD_V1(fmt), get_EMTD_V1(fmt),
- SIZE_OF_EMTD_V1);
- if (ret) {
- gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed");
- CRYPTO_gcm128_release(gctx);
- ret = EIO;
- goto out;
- }
- /*
- * verify metadata
- */
- CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac));
- CRYPTO_gcm128_release(gctx);
- if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) {
- gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed");
- ret = EINVAL;
- goto out;
- }
- /*
- * load verified metadata to the private part of inode
- */
- info->nr_minor = fmt->minor_id;
-
- object->o_alg = fmt->alg_id;
- object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS;
- object->o_block_bits = fmt->block_bits;
- object->o_mode = fmt->mode_id;
-
- ret = check_file_metadata(info);
-out:
- GF_FREE(fmt);
- return ret;
-}
-
-/*
- * perform metadata authentication against @loc->path;
- * extract crypt-specific attribute and populate @info
- * with them (optional)
- */
-int32_t
-open_format(unsigned char *str, int32_t len, loc_t *loc,
- struct crypt_inode_info *info, struct master_cipher_info *master,
- crypt_local_t *local, gf_boolean_t load_info)
-{
- struct crypt_format *fmt;
- if (len < sizeof(*fmt)) {
- gf_log("crypt", GF_LOG_ERROR, "Bad core format");
- return EIO;
- }
- fmt = (struct crypt_format *)str;
-
- if (fmt->loader_id >= LAST_MTD_LOADER) {
- gf_log("crypt", GF_LOG_ERROR, "Unsupported loader id %d",
- fmt->loader_id);
- return EINVAL;
- }
- str += sizeof(*fmt);
- len -= sizeof(*fmt);
-
- return mtd_loaders[fmt->loader_id].open_format(str, len, loc, info, master,
- local, load_info);
-}
-
-struct crypt_mtd_loader mtd_loaders[LAST_MTD_LOADER] = {
- [MTD_LOADER_V1] = {.format_size = format_size_v1,
- .create_format = create_format_v1,
- .open_format = open_format_v1,
- .update_format = update_format_v1}};
-
-/*
- Local variables:
- c-indentation-style: "K&R"
- mode-name: "LC"
- c-basic-offset: 8
- tab-width: 8
- fill-column: 80
- scroll-step: 1
- End:
-*/
diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h
deleted file mode 100644
index 0bcee1b18c8..00000000000
--- a/xlators/encryption/crypt/src/metadata.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __METADATA_H__
-#define __METADATA_H__
-
-#define NMTD_8_MAC_SIZE (8)
-#define EMTD_8_MAC_SIZE (8)
-
-typedef uint8_t nmtd_8_mac_t[NMTD_8_MAC_SIZE];
-typedef uint8_t emtd_8_mac_t[EMTD_8_MAC_SIZE];
-
-/*
- * Version "v1" of file's metadata.
- * Metadata of this version has 4 components:
- *
- * 1) EMTD (Encrypted part of MeTaData);
- * 2) NMTD (Non-encrypted part of MeTaData);
- * 3) EMTD_MAC; (EMTD Message Authentication Code);
- * 4) Array of per-link NMTD MACs (for every (hard)link it includes
- * exactly one MAC)
- */
-struct mtd_format_v1 {
- /* EMTD, encrypted part of meta-data */
- uint8_t alg_id; /* cipher algorithm id (only AES for now) */
- uint8_t mode_id; /* cipher mode id; (only XTS for now) */
- uint8_t block_bits; /* encoded block size */
- uint8_t minor_id; /* client translator id */
- uint8_t dkey_factor; /* encoded size of the data key */
- /* MACs */
- emtd_8_mac_t gmac; /* MAC of the encrypted meta-data, 8 bytes */
- nmtd_8_mac_t omac; /* per-link MACs of the non-encrypted
- * meta-data: at least one such MAC is always
- * present */
-} __attribute__((packed));
-
-/*
- * NMTD, the non-encrypted part of metadata of version "v1"
- * is file's gfid, which is generated on trusted machines.
- */
-#define SIZE_OF_NMTD_V1 (sizeof(uuid_t))
-#define SIZE_OF_EMTD_V1 \
- (offsetof(struct mtd_format_v1, gmac) - \
- offsetof(struct mtd_format_v1, alg_id))
-#define SIZE_OF_NMTD_V1_MAC (NMTD_8_MAC_SIZE)
-#define SIZE_OF_EMTD_V1_MAC (EMTD_8_MAC_SIZE)
-
-static inline unsigned char *
-get_EMTD_V1(struct mtd_format_v1 *format)
-{
- return &format->alg_id;
-}
-
-static inline unsigned char *
-get_NMTD_V1(struct crypt_inode_info *info)
-{
- return info->oid;
-}
-
-static inline unsigned char *
-get_EMTD_V1_MAC(struct mtd_format_v1 *format)
-{
- return format->gmac;
-}
-
-static inline unsigned char *
-get_NMTD_V1_MAC(struct mtd_format_v1 *format)
-{
- return format->omac;
-}
-
-#endif /* __METADATA_H__ */
diff --git a/xlators/encryption/rot-13/Makefile.am b/xlators/encryption/rot-13/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/encryption/rot-13/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/Makefile.am b/xlators/experimental/Makefile.am
deleted file mode 100644
index a530845c4c0..00000000000
--- a/xlators/experimental/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = jbr-client jbr-server fdl dht2 posix2
-
-CLEANFILES =
diff --git a/xlators/experimental/README.md b/xlators/experimental/README.md
deleted file mode 100644
index b00f24e114b..00000000000
--- a/xlators/experimental/README.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Purpose of this directory
-
-This directory is created to host experimental gluster translators. A new
-translator that is *experimental* in nature, would need to create its own
-subdirectory under this directory, to host/publish its work.
-
-Example:
- The first commit should include the following changes
- 1. xlators/experimental/Makefile.am
- NOTE: Add foobar to the list of SUBDIRS here
- 2. xlators/experimental/foobar
- 3. xlators/experimental/foobar/Makefle.am
- NOTE: Can be empty initially in the first commit
- 4. configure.ac
- NOTE: Include your experimental Makefile under AC_CONFIG_FILES
- 5. xlators/experimental/foobar/README.md
- NOTE: The readme should cover details as required for the translator to be
- accepted as experimental, primarily including a link to the specification
- under the gluster-specs repository [1]. Later the readme should suffice
- as an entry point for developers and users alike, who wish to experiment
- with the xlator under development
- 6. xlators/experimental/foobar/TODO.md
- NOTE: This is a list of TODO's identified during the development process
- that needs addressing over time. These include exceptions granted during
- the review process, for things not addressed when commits are merged into
- the repository
-
-# Why is it provided
-
-Quite often translator development that happens out of tree, does not get
-enough eyeballs early in its development phase, has not undergone CI
-(regression/continuous integration testing), and at times is not well integrated
-with the rest of gluster stack.
-
-Also, when such out of tree translators are submitted for acceptance, it is a
-bulk commit that makes review difficult and inefficient. Such submissions also
-have to be merged forward, and depending on the time spent in developing the
-translator the master branch could have moved far ahead, making this a painful
-activity.
-
-Experimental is born out of such needs, to provide xlator developers,
- - Early access to CI
- - Ability to adapt to ongoing changes in other parts of gluster
- - More eye balls on the code and design aspects of the translator
- - TBD: What else?
-
-and for maintainers,
- - Ability to look at smaller change sets in the review process
- - Ability to verify/check implementation against the specification provided
-
-# General rules
-
-1. If a new translator is added under here it should, at the very least, pass
-compilation.
-
-2. All translators under the experimental directory are shipped as a part of
-gluster-experimental RPMs.
-TBD: Spec file and other artifacts for the gluster-experimental RPM needs to be
-fleshed out.
-
-3. Experimental translators can leverage the CI framework as needed. Tests need
-to be hosted under xlators/experimental/tests initially, and later moved to the
-appropriate tests/ directory as the xlator matures. It is encouraged to provide
-tests for each commit or series of commits, so that code and tests can be
-inspected together.
-
-4. If any experimental translator breaks CI, it is quarantined till demonstrable
-proof towards the contrary is provided. This is applicable as tests are moved
-out of experimental tests directory to the CI framework directory, as otherwise
-experimental tests are not a part of regular CI regression runs.
-
-5. An experimental translator need not function at all, as a result commits can
-be merged pretty much at will as long as other rules as stated are not violated.
-
-6. Experimental submissions will be assigned a existing maintainer, to aid
-merging commits and ensure aspects of gluster code submissions are respected.
-When an experimental xlator is proposed and the first commit posted
-a mail to gluster-devel@gluster.org requesting attention, will assign the
-maintainer buddy for the submission.
-NOTE: As we scale, this may change.
-
-6. More?
-
-# Getting out of the experimental jail
-
-So you now think your xlator is ready to leave experimental and become part of
-mainline!
-- TBD: guidelines pending.
-
-# FAQs
-
-1. How do I submit/commit experimental framework changes outside of my
-experimental xlator?
- - Provide such framework changes as a separate commit
- - Conditionally ensure these are built or activated only when the experimental
- feature is activated, so as to prevent normal gluster workflow to function as
- before
- - TBD: guidelines and/or examples pending.
-
-2. Ask your question either on gluster-devel@gluster.org or as a change request
-to this file in gluster gerrit [2] for an answer that will be assimilated into
-this readme.
-
-# Links
-[1] http://review.gluster.org/#/q/project:glusterfs-specs
-
-[2] http://review.gluster.org/#/q/project:glusterfs
diff --git a/xlators/experimental/dht2/Makefile.am b/xlators/experimental/dht2/Makefile.am
deleted file mode 100644
index 9d910a66056..00000000000
--- a/xlators/experimental/dht2/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = dht2-client dht2-server
-
-CLEANFILES =
diff --git a/xlators/experimental/dht2/README.md b/xlators/experimental/dht2/README.md
deleted file mode 100644
index 8f249a83673..00000000000
--- a/xlators/experimental/dht2/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# DHT2 Experimental README
-
-DHT2 is the new distribution scheme being developed for Gluster, that
-aims to remove the subdirectory spread across all DHT subvolumes.
-
-As a result of this work, the Gluster backend file layouts and on disk
-representation of directories and files are modified, thus making DHT2
-volumes incompatible to existing DHT based Gluster deployments.
-
-This document presents interested users with relevant data to play around
-with DHT2 volumes and provide feedback towards the same.
-
-REMOVEME: Design details currently under review here,
- - http://review.gluster.org/#/c/13395/
-
-TODO: Add more information as relevant code is pulled in
-
-# Directory strucutre elaborated
-
-## dht2-server
-This directory contains code for the server side DHT2 xlator. This xlator is
-intended to run on the brick graph, and is responsible for FOP synchronization,
-redirection, transactions, and journal replays.
-
-NOTE: The server side code also handles changes to volume/cluster map and
-also any rebalance activities.
-
-## dht2-client
-This directory contains code for the client side DHT2 xlator. This xlator is
-intended to run on the client/access protocol/mount graph, and is responsible
-for FOP routing to the right DHT2 subvolume. It uses a volume/cluster wide map
-of the routing (layout), to achieve the same.
-
-## dht2-common
-This directory contains code that is used in common across other parts of DHT2.
-For example, FOP routing store/consult abstractions that are common across the
-client and server side of DHT2.
-
-## Issue: How to build dht2-common?
- 1. Build a shared object
- - We cannot ship this as a part of both the client xlator RPM
- 2. Build an archive
- - Symbol clashes? when both the client and server xlators are loaded as a
- part of the same graph
- 3. Compile with other parts of the code that needs it
- - Not a very different from (2) above
- - This is what is chosen at present, and maybe would be revised later
diff --git a/xlators/experimental/dht2/TODO.md b/xlators/experimental/dht2/TODO.md
deleted file mode 100644
index 1e2c53c5b36..00000000000
--- a/xlators/experimental/dht2/TODO.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# DHT2 TODO list
-
-<Items will be added as code is pulled into the repository>
diff --git a/xlators/experimental/dht2/dht2-client/src/Makefile.am b/xlators/experimental/dht2/dht2-client/src/Makefile.am
deleted file mode 100644
index 3a13a2a3986..00000000000
--- a/xlators/experimental/dht2/dht2-client/src/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-xlator_LTLIBRARIES = dht2c.la
-
-dht2c_sources = dht2-client-main.c
-
-dht2common_sources = $(top_srcdir)/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
-
-dht2c_la_SOURCES = $(dht2c_sources) $(dht2common_sources)
-dht2c_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-dht2c_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS)
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/experimental/dht2/dht2-common/src/
-AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
-
-CLEANFILES =
diff --git a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c b/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c
deleted file mode 100644
index 556385724a4..00000000000
--- a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: dht2-client-main.c
- * This file contains the xlator loading functions, FOP entry points
- * and options.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
-
-int32_t
-dht2_client_init(xlator_t *this)
-{
- if (!this->children) {
- gf_log(this->name, GF_LOG_ERROR,
- "Missing children in volume graph, this (%s) is"
- " not a leaf translator",
- this->name);
- return -1;
- }
-
- return 0;
-}
-
-void
-dht2_client_fini(xlator_t *this)
-{
- return;
-}
-
-class_methods_t class_methods = {
- .init = dht2_client_init,
- .fini = dht2_client_fini,
-};
-
-struct xlator_fops fops = {};
-
-struct xlator_cbks cbks = {};
-
-/*
-struct xlator_dumpops dumpops = {
-};
-*/
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c b/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
deleted file mode 100644
index d959483b8a4..00000000000
--- a/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: dht2-common-map.c
- * This file contains helper routines to store, consult, the volume map
- * for subvolume to GFID relations.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "statedump.h"
diff --git a/xlators/experimental/dht2/dht2-server/src/Makefile.am b/xlators/experimental/dht2/dht2-server/src/Makefile.am
deleted file mode 100644
index c76fab0ca74..00000000000
--- a/xlators/experimental/dht2/dht2-server/src/Makefile.am
+++ /dev/null
@@ -1,23 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = dht2s.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-dht2s_sources = dht2-server-main.c
-
-dht2common_sources = $(top_srcdir)/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c
-
-dht2s_la_SOURCES = $(dht2s_sources) $(dht2common_sources)
-dht2s_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-dht2s_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS)
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/experimental/dht2/dht2-common/src/
-AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
-
-CLEANFILES =
diff --git a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c b/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c
deleted file mode 100644
index f051a44e99f..00000000000
--- a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: dht2-server-main.c
- * This file contains the xlator loading functions, FOP entry points
- * and options.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
-
-int32_t
-dht2_server_init(xlator_t *this)
-{
- if (!this->children) {
- gf_log(this->name, GF_LOG_ERROR,
- "Missing children in volume graph, this (%s) is"
- " not a leaf translator",
- this->name);
- return -1;
- }
-
- return 0;
-}
-
-void
-dht2_server_fini(xlator_t *this)
-{
- return;
-}
-
-class_methods_t class_methods = {
- .init = dht2_server_init,
- .fini = dht2_server_fini,
-};
-
-struct xlator_fops fops = {};
-
-struct xlator_cbks cbks = {};
-
-/*
-struct xlator_dumpops dumpops = {
-};
-*/
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am
deleted file mode 100644
index bdcaaf6c38d..00000000000
--- a/xlators/experimental/fdl/src/Makefile.am
+++ /dev/null
@@ -1,48 +0,0 @@
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-if WITH_SERVER
-xlator_LTLIBRARIES = fdl.la
-endif
-
-noinst_HEADERS = fdl.h
-
-nodist_fdl_la_SOURCES = fdl.c
-fdl_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-if WITH_SERVER
-sbin_PROGRAMS = gf_logdump gf_recon
-endif
-gf_logdump_SOURCES = logdump.c
-nodist_gf_logdump_SOURCES = libfdl.c
-gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/api/src/libgfapi.la $(GFAPI_LIBS) $(UUID_LIBS)
-
-# Eventually recon(ciliation) code will move elsewhere, but for now it's
-# easier to have it next to the similar logdump code.
-gf_recon_SOURCES = recon.c
-nodist_gf_recon_SOURCES = librecon.c
-gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/api/src/libgfapi.la $(GFAPI_LIBS) $(UUID_LIBS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
- -I$(top_srcdir)/api/src -fPIC \
- -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
- -DDATADIR=\"$(localstatedir)\"
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py
-EXTRA_DIST = fdl-tmpl.c.in dump-tmpl.c.in recon-tmpl.c.in
-
-CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) \
- $(nodist_gf_recon_SOURCES)
-
-fdl.c: fdl-tmpl.c.in gen_fdl.py
- $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c.in > $@
-
-libfdl.c: dump-tmpl.c.in gen_dumper.py
- $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c.in > $@
-
-librecon.c: recon-tmpl.c.in gen_recon.py
- $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c.in > $@
diff --git a/xlators/experimental/fdl/src/dump-tmpl.c.in b/xlators/experimental/fdl/src/dump-tmpl.c.in
deleted file mode 100644
index 97249ac3e71..00000000000
--- a/xlators/experimental/fdl/src/dump-tmpl.c.in
+++ /dev/null
@@ -1,177 +0,0 @@
-#pragma fragment PROLOG
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#include <ctype.h>
-#endif
-
-#include "glfs.h"
-#include "iatt.h"
-#include "xlator.h"
-#include "fdl.h"
-
-/*
- * Returns 0 if the string is ASCII printable *
- * and -1 if it's not ASCII printable *
- */
-int
-str_isprint(char *s)
-{
- int ret = -1;
-
- if (!s)
- goto out;
-
- while (s[0] != '\0') {
- if (!isprint(s[0]))
- goto out;
- else
- s++;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-#pragma fragment DICT
-{
- int key_len, data_len;
- char *key_ptr;
- char *key_val;
- printf("@ARGNAME@ = dict {\n");
- for (;;) {
- key_len = *((int *)new_meta);
- new_meta += sizeof(int);
- if (!key_len) {
- break;
- }
- key_ptr = new_meta;
- new_meta += key_len;
- data_len = *((int *)new_meta);
- key_val = new_meta + sizeof(int);
- new_meta += sizeof(int) + data_len;
- if (str_isprint(key_val))
- printf(" %s = <%d bytes>\n", key_ptr, data_len);
- else
- printf(" %s = %s <%d bytes>\n", key_ptr, key_val, data_len);
- }
- printf("}\n");
-}
-
-#pragma fragment DOUBLE
-printf("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta),
- *((uint64_t *)new_meta));
-new_meta += sizeof(uint64_t);
-
-#pragma fragment GFID
-printf("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta)));
-new_meta += 16;
-
-#pragma fragment INTEGER
-printf("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta),
- *((uint32_t *)new_meta));
-new_meta += sizeof(uint32_t);
-
-#pragma fragment LOC
-printf("@ARGNAME@ = loc {\n");
-printf(" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
-new_meta += 16;
-printf(" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
-new_meta += 16;
-if (*(new_meta++)) {
- printf(" name = %s\n", new_meta);
- new_meta += (strlen(new_meta) + 1);
-}
-printf("}\n");
-
-#pragma fragment STRING
-if (*(new_meta++)) {
- printf("@ARGNAME@ = %s\n", new_meta);
- new_meta += (strlen(new_meta) + 1);
-}
-
-#pragma fragment VECTOR
-{
- size_t len = *((size_t *)new_meta);
- new_meta += sizeof(len);
- printf("@ARGNAME@ = <%zu bytes>\n", len);
- new_data += len;
-}
-
-#pragma fragment IATT
-{
- ia_prot_t *myprot = ((ia_prot_t *)new_meta);
- printf("@ARGNAME@ = iatt {\n");
- printf(" ia_prot = %c%c%c", myprot->suid ? 'S' : '-',
- myprot->sgid ? 'S' : '-', myprot->sticky ? 'T' : '-');
- printf("%c%c%c", myprot->owner.read ? 'r' : '-',
- myprot->owner.write ? 'w' : '-', myprot->owner.exec ? 'x' : '-');
- printf("%c%c%c", myprot->group.read ? 'r' : '-',
- myprot->group.write ? 'w' : '-', myprot->group.exec ? 'x' : '-');
- printf("%c%c%c\n", myprot->other.read ? 'r' : '-',
- myprot->other.write ? 'w' : '-', myprot->other.exec ? 'x' : '-');
- new_meta += sizeof(ia_prot_t);
- uint32_t *myints = (uint32_t *)new_meta;
- printf(" ia_uid = %u\n", myints[0]);
- printf(" ia_gid = %u\n", myints[1]);
- printf(" ia_atime = %u.%09u\n", myints[2], myints[3]);
- printf(" ia_mtime = %u.%09u\n", myints[4], myints[5]);
- new_meta += sizeof(*myints) * 6;
-}
-
-#pragma fragment FOP
-void fdl_dump_@NAME@(char **old_meta, char **old_data)
-{
- char *new_meta = *old_meta;
- char *new_data = *old_data;
-
- /* TBD: word size/endianness */
- @FUNCTION_BODY@
-
- *old_meta = new_meta;
- *old_data = new_data;
-}
-
-#pragma fragment CASE
-case GF_FOP_@UPNAME@:
- printf("=== GF_FOP_@UPNAME@\n");
- fdl_dump_@NAME@(&new_meta, &new_data);
- break;
-
-#pragma fragment EPILOG
- int
- fdl_dump(char **old_meta, char **old_data)
- {
- char *new_meta = *old_meta;
- char *new_data = *old_data;
- static glfs_t *fs = NULL;
- int recognized = 1;
- event_header_t *eh;
-
- /*
- * We don't really call anything else in GFAPI, but this is the most
- * convenient way to satisfy all of the spurious dependencies on how it
- * or glusterfsd initialize (e.g. setting up THIS).
- */
- if (!fs) {
- fs = glfs_new("dummy");
- }
-
- eh = (event_header_t *)new_meta;
- new_meta += sizeof(*eh);
-
- /* TBD: check event_type instead of assuming NEW_REQUEST */
-
- switch (eh->fop_type) {
- @SWITCH_BODY@
-
- default :
- printf("unknown fop %u\n", eh->fop_type);
- recognized = 0;
- }
-
- *old_meta = new_meta;
- *old_data = new_data;
- return recognized;
- }
diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c.in b/xlators/experimental/fdl/src/fdl-tmpl.c.in
deleted file mode 100644
index c99157be957..00000000000
--- a/xlators/experimental/fdl/src/fdl-tmpl.c.in
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include "call-stub.h"
-#include "iatt.h"
-#include "defaults.h"
-#include "syscall.h"
-#include "xlator.h"
-#include "fdl.h"
-
-/* TBD: make tunable */
-#define META_FILE_SIZE (1 << 20)
-#define DATA_FILE_SIZE (1 << 24)
-
-enum gf_fdl { gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, gf_fdl_mt_end };
-
-typedef struct {
- char *type;
- off_t size;
- char *path;
- int fd;
- void *ptr;
- off_t max_offset;
-} log_obj_t;
-
-typedef struct {
- struct list_head reqs;
- pthread_mutex_t req_lock;
- pthread_cond_t req_cond;
- char *log_dir;
- pthread_t worker;
- gf_boolean_t should_stop;
- gf_boolean_t change_term;
- log_obj_t meta_log;
- log_obj_t data_log;
- int term;
- int first_term;
-} fdl_private_t;
-
-int32_t
-fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
-
-void
-fdl_enqueue(xlator_t *this, call_stub_t *stub)
-{
- fdl_private_t *priv = this->private;
-
- pthread_mutex_lock(&priv->req_lock);
- list_add_tail(&stub->list, &priv->reqs);
- pthread_mutex_unlock(&priv->req_lock);
-
- pthread_cond_signal(&priv->req_cond);
-}
-
-#pragma generate
-
-char *
-fdl_open_term_log(xlator_t *this, log_obj_t *obj, int term)
-{
- fdl_private_t *priv = this->private;
- int ret;
- char *ptr = NULL;
-
- /*
- * Use .jnl instead of .log so that we don't get test info (mistakenly)
- * appended to our journal files.
- */
- if (this->ctx->cmd_args.log_ident) {
- ret = gf_asprintf(&obj->path, "%s/%s-%s-%d.jnl", priv->log_dir,
- this->ctx->cmd_args.log_ident, obj->type, term);
- } else {
- ret = gf_asprintf(&obj->path, "%s/fubar-%s-%d.jnl", priv->log_dir,
- obj->type, term);
- }
- if ((ret <= 0) || !obj->path) {
- gf_log(this->name, GF_LOG_ERROR, "failed to construct log-file path");
- goto err;
- }
-
- gf_log(this->name, GF_LOG_INFO, "opening %s (size %" PRId64 ")", obj->path,
- obj->size);
-
- obj->fd = open(obj->path, O_RDWR | O_CREAT | O_TRUNC, 0666);
- if (obj->fd < 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to open log file (%s)",
- strerror(errno));
- goto err;
- }
-
-#if !defined(GF_BSD_HOST_OS)
- /*
- * NetBSD can just go die in a fire. Even though it claims to support
- * fallocate/posix_fallocate they don't actually *do* anything so the
- * file size remains zero. Then mmap succeeds anyway, but any access
- * to the mmap'ed region will segfault. It would be acceptable for
- * fallocate to do what it says, for mmap to fail, or for access to
- * extend the file. NetBSD managed to hit the trifecta of Getting
- * Everything Wrong, and debugging in that environment to get this far
- * has already been painful enough (systems I worked on in 1990 were
- * better that way). We'll fall through to the lseek/write method, and
- * performance will be worse, and TOO BAD.
- */
- if (sys_fallocate(obj->fd, 0, 0, obj->size) < 0)
-#endif
- {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to fallocate space for log file");
- /* Have to do this the ugly page-faulty way. */
- (void)sys_lseek(obj->fd, obj->size - 1, SEEK_SET);
- (void)sys_write(obj->fd, "", 1);
- }
-
- ptr = mmap(NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0);
- if (ptr == MAP_FAILED) {
- gf_log(this->name, GF_LOG_ERROR, "failed to mmap log (%s)",
- strerror(errno));
- goto err;
- }
-
- obj->ptr = ptr;
- obj->max_offset = 0;
- return ptr;
-
-err:
- if (obj->fd >= 0) {
- sys_close(obj->fd);
- obj->fd = (-1);
- }
- if (obj->path) {
- GF_FREE(obj->path);
- obj->path = NULL;
- }
- return ptr;
-}
-
-void
-fdl_close_term_log(xlator_t *this, log_obj_t *obj)
-{
- fdl_private_t *priv = this->private;
-
- if (obj->ptr) {
- (void)munmap(obj->ptr, obj->size);
- obj->ptr = NULL;
- }
-
- if (obj->fd >= 0) {
- gf_log(this->name, GF_LOG_INFO,
- "truncating term %d %s journal to %" PRId64,
- priv->term, obj->type, obj->max_offset);
- if (sys_ftruncate(obj->fd, obj->max_offset) < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to truncate journal (%s)", strerror(errno));
- }
- sys_close(obj->fd);
- obj->fd = (-1);
- }
-
- if (obj->path) {
- GF_FREE(obj->path);
- obj->path = NULL;
- }
-}
-
-gf_boolean_t
-fdl_change_term(xlator_t *this, char **meta_ptr, char **data_ptr)
-{
- fdl_private_t *priv = this->private;
-
- fdl_close_term_log(this, &priv->meta_log);
- fdl_close_term_log(this, &priv->data_log);
-
- ++(priv->term);
-
- *meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term);
- if (!*meta_ptr) {
- return _gf_false;
- }
-
- *data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term);
- if (!*data_ptr) {
- return _gf_false;
- }
-
- return _gf_true;
-}
-
-void *
-fdl_worker(void *arg)
-{
- xlator_t *this = arg;
- fdl_private_t *priv = this->private;
- call_stub_t *stub;
- char *meta_ptr = NULL;
- off_t *meta_offset = &priv->meta_log.max_offset;
- char *data_ptr = NULL;
- off_t *data_offset = &priv->data_log.max_offset;
- unsigned long base_as_ul;
- void *msync_ptr;
- size_t msync_len;
- gf_boolean_t recycle;
- void *err_label = &&err_unlocked;
-
- priv->meta_log.type = "meta";
- priv->meta_log.size = META_FILE_SIZE;
- priv->meta_log.path = NULL;
- priv->meta_log.fd = (-1);
- priv->meta_log.ptr = NULL;
-
- priv->data_log.type = "data";
- priv->data_log.size = DATA_FILE_SIZE;
- priv->data_log.path = NULL;
- priv->data_log.fd = (-1);
- priv->data_log.ptr = NULL;
-
- /* TBD: initial term should come from persistent storage (e.g. etcd) */
- priv->first_term = ++(priv->term);
- meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term);
- if (!meta_ptr) {
- goto *err_label;
- }
- data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term);
- if (!data_ptr) {
- fdl_close_term_log(this, &priv->meta_log);
- goto *err_label;
- }
-
- for (;;) {
- pthread_mutex_lock(&priv->req_lock);
- err_label = &&err_locked;
- while (list_empty(&priv->reqs)) {
- pthread_cond_wait(&priv->req_cond, &priv->req_lock);
- if (priv->should_stop) {
- goto *err_label;
- }
- if (priv->change_term) {
- if (!fdl_change_term(this, &meta_ptr, &data_ptr)) {
- goto *err_label;
- }
- priv->change_term = _gf_false;
- continue;
- }
- }
- stub = list_entry(priv->reqs.next, call_stub_t, list);
- list_del_init(&stub->list);
- pthread_mutex_unlock(&priv->req_lock);
- err_label = &&err_unlocked;
- /*
- * TBD: batch requests
- *
- * What we should do here is gather up *all* of the requests
- * that have accumulated since we were last at this point,
- * blast them all out in one big writev, and then dispatch them
- * all before coming back for more. That maximizes throughput,
- * at some cost to latency (due to queuing effects at the log
- * stage). Note that we're likely to be above io-threads, so
- * the dispatch itself will be parallelized (at further cost to
- * latency). For now, we just do the simplest thing and handle
- * one request all the way through before fetching the next.
- *
- * So, why mmap/msync instead of writev/fdatasync? Because it's
- * faster. Much faster. So much faster that I half-suspect
- * cheating, but it's more convenient for now than having to
- * ensure that everything's page-aligned for O_DIRECT (the only
- * alternative that still might avoid ridiculous levels of
- * local-FS overhead).
- *
- * TBD: check that msync really does get our data to disk.
- */
- gf_log(this->name, GF_LOG_DEBUG, "logging %u+%u bytes for op %d",
- stub->jnl_meta_len, stub->jnl_data_len, stub->fop);
- recycle = _gf_false;
- if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) {
- recycle = _gf_true;
- }
- if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) {
- recycle = _gf_true;
- }
- if (recycle && !fdl_change_term(this, &meta_ptr, &data_ptr)) {
- goto *err_label;
- }
- meta_ptr = priv->meta_log.ptr;
- data_ptr = priv->data_log.ptr;
- gf_log(this->name, GF_LOG_DEBUG, "serializing to %p/%p",
- meta_ptr + *meta_offset, data_ptr + *data_offset);
- stub->serialize(stub, meta_ptr + *meta_offset, data_ptr + *data_offset);
- if (stub->jnl_meta_len > 0) {
- base_as_ul = (unsigned long)(meta_ptr + *meta_offset);
- msync_ptr = (void *)(base_as_ul & ~0x0fff);
- msync_len = (size_t)(base_as_ul & 0x0fff);
- if (msync(msync_ptr, msync_len + stub->jnl_meta_len, MS_SYNC) < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to log request meta (%s)", strerror(errno));
- }
- *meta_offset += stub->jnl_meta_len;
- }
- if (stub->jnl_data_len > 0) {
- base_as_ul = (unsigned long)(data_ptr + *data_offset);
- msync_ptr = (void *)(base_as_ul & ~0x0fff);
- msync_len = (size_t)(base_as_ul & 0x0fff);
- if (msync(msync_ptr, msync_len + stub->jnl_data_len, MS_SYNC) < 0) {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to log request data (%s)", strerror(errno));
- }
- *data_offset += stub->jnl_data_len;
- }
- call_resume(stub);
- }
-
-err_locked:
- pthread_mutex_unlock(&priv->req_lock);
-err_unlocked:
- fdl_close_term_log(this, &priv->meta_log);
- fdl_close_term_log(this, &priv->data_log);
- return NULL;
-}
-
-int32_t
-fdl_ipc_continue(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
-{
- /*
- * Nothing to be done here. Just Unwind. *
- */
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, xdata);
-
- return 0;
-}
-
-int32_t
-fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
-{
- call_stub_t *stub;
- fdl_private_t *priv = this->private;
- dict_t *tdict;
- int32_t gt_err = EIO;
-
- switch (op) {
- case FDL_IPC_CHANGE_TERM:
- gf_log(this->name, GF_LOG_INFO, "got CHANGE_TERM op");
- priv->change_term = _gf_true;
- pthread_cond_signal(&priv->req_cond);
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL);
- break;
-
- case FDL_IPC_GET_TERMS:
- gf_log(this->name, GF_LOG_INFO, "got GET_TERMS op");
- tdict = dict_new();
- if (!tdict) {
- gt_err = ENOMEM;
- goto gt_done;
- }
- if (dict_set_int32(tdict, "first", priv->first_term) != 0) {
- goto gt_done;
- }
- if (dict_set_int32(tdict, "last", priv->term) != 0) {
- goto gt_done;
- }
- gt_err = 0;
- gt_done:
- if (gt_err) {
- STACK_UNWIND_STRICT(ipc, frame, -1, gt_err, NULL);
- } else {
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, tdict);
- }
- if (tdict) {
- dict_unref(tdict);
- }
- break;
-
- case FDL_IPC_JBR_SERVER_ROLLBACK:
- /*
- * In case of a rollback from jbr-server, dump *
- * the term and index number in the journal, *
- * which will later be used to rollback the fop *
- */
- stub = fop_ipc_stub(frame, fdl_ipc_continue, op, xdata);
- fdl_len_ipc(stub);
- stub->serialize = fdl_serialize_ipc;
- fdl_enqueue(this, stub);
-
- break;
-
- default:
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ipc, op, xdata);
- }
-
- return 0;
-}
-
-int
-fdl_init(xlator_t *this)
-{
- fdl_private_t *priv = NULL;
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_fdl_mt_fdl_private_t);
- if (!priv) {
- gf_log(this->name, GF_LOG_ERROR, "failed to allocate fdl_private");
- goto err;
- }
-
- INIT_LIST_HEAD(&priv->reqs);
- if (pthread_mutex_init(&priv->req_lock, NULL) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_lock");
- goto err;
- }
- if (pthread_cond_init(&priv->req_cond, NULL) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_cond");
- goto err;
- }
-
- GF_OPTION_INIT("log-path", priv->log_dir, path, err);
-
- this->private = priv;
- /*
- * The rest of the fop table is automatically generated, so this is a
- * bit cleaner than messing with the generation to add a hand-written
- * exception.
- */
-
- if (gf_thread_create(&priv->worker, NULL, fdl_worker, this, "fdlwrker") !=
- 0) {
- gf_log(this->name, GF_LOG_ERROR, "failed to start fdl_worker");
- goto err;
- }
-
- return 0;
-
-err:
- if (priv) {
- GF_FREE(priv);
- }
- return -1;
-}
-
-void
-fdl_fini(xlator_t *this)
-{
- fdl_private_t *priv = this->private;
-
- if (priv) {
- priv->should_stop = _gf_true;
- pthread_cond_signal(&priv->req_cond);
- pthread_join(priv->worker, NULL);
- GF_FREE(priv);
- }
-}
-
-int
-fdl_reconfigure(xlator_t *this, dict_t *options)
-{
- fdl_private_t *priv = this->private;
-
- GF_OPTION_RECONF("log_dir", priv->log_dir, options, path, out);
- /* TBD: react if it changed */
-
-out:
- return 0;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("fdl", this, out);
-
- ret = xlator_mem_acct_init(this, gf_fdl_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-class_methods_t class_methods = {
- .init = fdl_init,
- .fini = fdl_fini,
- .reconfigure = fdl_reconfigure,
- .notify = default_notify,
-};
-
-struct volume_options options[] = {
- {.key = {"log-path"},
- .type = GF_OPTION_TYPE_PATH,
- .default_value = DEFAULT_LOG_FILE_DIRECTORY,
- .description = "Directory for FDL files."},
- {.key = {NULL}},
-};
-
-struct xlator_cbks cbks = {
- .release = default_release,
- .releasedir = default_releasedir,
- .forget = default_forget,
-};
diff --git a/xlators/experimental/fdl/src/fdl.h b/xlators/experimental/fdl/src/fdl.h
deleted file mode 100644
index 827db9f1246..00000000000
--- a/xlators/experimental/fdl/src/fdl.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _FDL_H_
-#define _FDL_H_
-
-#define NEW_REQUEST (uint8_t)'N'
-
-typedef struct {
- uint8_t event_type; /* e.g. NEW_REQUEST */
- uint8_t fop_type; /* e.g. GF_FOP_SETATTR */
- uint16_t request_id;
- uint32_t ext_length;
-} event_header_t;
-
-enum {
- FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */
- FDL_IPC_CHANGE_TERM,
- FDL_IPC_GET_TERMS,
- FDL_IPC_JBR_SERVER_ROLLBACK
-};
-
-#endif /* _FDL_H_ */
diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py
deleted file mode 100755
index 630b54492f7..00000000000
--- a/xlators/experimental/fdl/src/gen_dumper.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/python3
-
-from __future__ import print_function
-import os
-import re
-import sys
-
-curdir = os.path.dirname (sys.argv[0])
-gendir = os.path.join (curdir, '../../../../libglusterfs/src')
-sys.path.append (gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# See the big header comment at the start of gen_fdl.py to see how the stages
-# fit together. The big difference here is that *all* of the C code is in the
-# template file as labelled fragments, instead of as Python strings. That
-# makes it much easier to edit in one place, with proper syntax highlighting
-# and indentation.
-#
-# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of
-# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE.
-#
-# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and
-# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution
-# in the middle of each function) is emitted immediately; the expanded CASE
-# code is saved for the next stage.
-#
-# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code
-# in the middle of EPILOG, to generate the whole output file.
-#
-# Another way of looking at it is to consider how the fragments appear in
-# the final output:
-#
-# PROLOG
-# FOP (expanded for CREATE)
-# FOP before FUNCTION_BODY
-# LOC, INTEGER, GFID, etc. (one per arg, by type)
-# FOP after FUNCTION_BODY
-# FOP (expanded for WRITEV)
-# FOP before FUNCTION_BODY
-# GFID, VECTOR, etc. (on per arg, by type)
-# FOP after FUNCTION_BODY
-# (more FOPs)
-# EPILOG
-# EPILOG before CASE
-# CASE statements (one per fop)
-# EPILOG after CASE
-
-typemap = {
- 'dict_t *': ( "DICT", ""),
- 'fd_t *': ( "GFID", ""),
- 'dev_t': ( "DOUBLE", "%\"PRId64\" (0x%\"PRIx64\")"),
- 'gf_xattrop_flags_t': ( "INTEGER", "%d (0x%x)"),
- 'int32_t': ( "INTEGER", "%d (0x%x)"),
- 'mode_t': ( "INTEGER", "%d (0x%x)"),
- 'off_t': ( "DOUBLE", "%\"PRId64\" (0x%\"PRIx64\")"),
- 'size_t': ( "DOUBLE", "%\"PRId64\" (0x%\"PRIx64\")"),
- 'uint32_t': ( "INTEGER", "%d (0x%x)"),
- 'loc_t *': ( "LOC", ""),
- 'const char *': ( "STRING", ""),
- 'struct iovec *': ( "VECTOR", ""),
- 'struct iatt *': ( "IATT", ""),
-}
-
-def get_special_subs (args):
- code = ""
- for arg in args:
- if (arg[0] != 'fop-arg') or (len(arg) < 4):
- continue
- recon_type, recon_fmt = typemap[arg[2]]
- code += fragments[recon_type].replace("@ARGNAME@", arg[3]) \
- .replace("@FORMAT@", recon_fmt)
- return code
-
-def gen_functions ():
- code = ""
- for name, value in ops.items():
- if "journal" not in [ x[0] for x in value ]:
- continue
- fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value)
- # Print the FOP fragment with @FUNCTION_BODY@ in the middle.
- code += generate(fragments["FOP"], name, fop_subs)
- return code
-
-def gen_cases ():
- code = ""
- for name, value in ops.items():
- if "journal" not in [ x[0] for x in value ]:
- continue
- # Add the CASE fragment for this fop.
- code += generate(fragments["CASE"], name, fop_subs)
- return code
-
-def load_fragments (path="recon-tmpl.c"):
- pragma_re = re.compile('pragma fragment (.*)')
- cur_symbol = None
- cur_value = ""
- result = {}
- for line in open(path, "r").readlines():
- m = pragma_re.search(line)
- if m:
- if cur_symbol:
- result[cur_symbol] = cur_value
- cur_symbol = m.group(1)
- cur_value = ""
- else:
- cur_value += line
- if cur_symbol:
- result[cur_symbol] = cur_value
- return result
-
-if __name__ == "__main__":
- fragments = load_fragments(sys.argv[1])
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- print(fragments["PROLOG"])
- print(gen_functions())
- print(fragments["EPILOG"].replace("@SWITCH_BODY@", gen_cases()))
- print("/* END GENERATED CODE */")
diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py
deleted file mode 100755
index 467ec8927b7..00000000000
--- a/xlators/experimental/fdl/src/gen_fdl.py
+++ /dev/null
@@ -1,354 +0,0 @@
-#!/usr/bin/python3
-
-from __future__ import print_function
-import os
-import sys
-
-curdir = os.path.dirname (sys.argv[0])
-gendir = os.path.join (curdir, '../../../../libglusterfs/src')
-sys.path.append (gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# Generation occurs in three stages. In this case, it actually makes more
-# sense to discuss them in the *opposite* order of that in which they
-# actually happen.
-#
-# Stage 3 is to insert all of the generated code into a file, replacing the
-# "#pragma generate" that's already there. The file can thus contain all
-# sorts of stuff that's not specific to one fop, either before or after the
-# generated code as appropriate.
-#
-# Stage 2 is to generate all of the code *for a particular fop*, using a
-# string-valued template plus a table of substitution values. Most of these
-# are built in to the generator itself. However, we also add a couple that
-# are specific to this particular translator - LEN_CODE and SER_CODE. These
-# are per-fop functions to get the length or the contents (respectively) of
-# what we'll put in the log. As with stage 3 allowing per-file boilerplate
-# before and after generated code, this allows per-fop boilerplate before and
-# after generated code.
-#
-# Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for
-# each fop, and put them in the same table where e.g. NAME and SHORT_ARGS
-# already are. We do this by looking at the fop-description table in the
-# generator module, then doing out own template substitution to plug each
-# specific argument name into another string-valued template.
-#
-# So, what does this leave us with in terms of variables and files?
-#
-# For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE
-# strings, which are used to generate the length and serialization code for
-# each argument type.
-#
-# For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_
-# prefix), which are used (along with the output from stage 1) to generate
-# whole functions.
-#
-# For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert
-# the collection of all functions defined in stage 2.
-
-
-LEN_TEMPLATE = """
-void
-fdl_len_@NAME@ (call_stub_t *stub)
-{
- uint32_t meta_len = sizeof (event_header_t);
- uint32_t data_len = 0;
-
- /* TBD: global stuff, e.g. uid/gid */
-@LEN_CODE@
-
- /* TBD: pad extension length */
- stub->jnl_meta_len = meta_len;
- stub->jnl_data_len = data_len;
-}
-"""
-
-SER_TEMPLATE = """
-void
-fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf)
-{
- event_header_t *eh;
- unsigned long offset = 0;
-
- /* TBD: word size/endianness */
- eh = (event_header_t *)meta_buf;
- eh->event_type = NEW_REQUEST;
- eh->fop_type = GF_FOP_@UPNAME@;
- eh->request_id = 0; // TBD
- meta_buf += sizeof (*eh);
-@SER_CODE@
- /* TBD: pad extension length */
- eh->ext_length = offset;
-}
-"""
-
-CBK_TEMPLATE = """
-int32_t
-fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- @LONG_ARGS@)
-{
- STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
- @SHORT_ARGS@);
- return 0;
-}
-"""
-
-CONTINUE_TEMPLATE = """
-int32_t
-fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this,
- @LONG_ARGS@)
-{
- STACK_WIND (frame, fdl_@NAME@_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
- @SHORT_ARGS@);
- return 0;
-}
-
-"""
-
-FOP_TEMPLATE = """
-int32_t
-fdl_@NAME@ (call_frame_t *frame, xlator_t *this,
- @LONG_ARGS@)
-{
- call_stub_t *stub;
-
- stub = fop_@NAME@_stub (frame, default_@NAME@,
- @SHORT_ARGS@);
- fdl_len_@NAME@ (stub);
- stub->serialize = fdl_serialize_@NAME@;
- fdl_enqueue (this, stub);
-
- return 0;
-}
-"""
-
-LEN_DICT_TEMPLATE = """
- if (@SRC@) {
- data_pair_t *memb;
- for (memb = @SRC@->members_list; memb; memb = memb->next) {
- meta_len += sizeof(int);
- meta_len += strlen(memb->key) + 1;
- meta_len += sizeof(int);
- meta_len += memb->value->len;
- }
- }
- meta_len += sizeof(int);
-"""
-
-LEN_GFID_TEMPLATE = """
- meta_len += 16;
-"""
-
-LEN_INTEGER_TEMPLATE = """
- meta_len += sizeof (@SRC@);
-"""
-
-# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL
-LEN_LOC_TEMPLATE = """
- if (@SRC@.name) {
- meta_len += (strlen (@SRC@.name) + 34);
- } else {
- meta_len += 33;
- }
-"""
-
-LEN_STRING_TEMPLATE = """
- if (@SRC@) {
- meta_len += (strlen (@SRC@) + 1);
- } else {
- meta_len += 1;
- }
-"""
-
-LEN_VECTOR_TEMPLATE = """
- meta_len += sizeof(size_t);
- data_len += iov_length (@VEC@, @CNT@);
-"""
-
-LEN_IATT_TEMPLATE = """
- meta_len += sizeof(@SRC@.ia_prot);
- meta_len += sizeof(@SRC@.ia_uid);
- meta_len += sizeof(@SRC@.ia_gid);
- meta_len += sizeof(@SRC@.ia_atime);
- meta_len += sizeof(@SRC@.ia_atime_nsec);
- meta_len += sizeof(@SRC@.ia_mtime);
- meta_len += sizeof(@SRC@.ia_mtime_nsec);
-"""
-
-SERLZ_DICT_TEMPLATE = """
- if (@SRC@) {
- data_pair_t *memb;
- for (memb = @SRC@->members_list; memb; memb = memb->next) {
- *((int *)(meta_buf+offset)) = strlen(memb->key) + 1;
- offset += sizeof(int);
- strcpy (meta_buf+offset, memb->key);
- offset += strlen(memb->key) + 1;
- *((int *)(meta_buf+offset)) = memb->value->len;
- offset += sizeof(int);
- memcpy (meta_buf+offset, memb->value->data, memb->value->len);
- offset += memb->value->len;
- }
- }
- *((int *)(meta_buf+offset)) = 0;
- offset += sizeof(int);
-"""
-
-SERLZ_GFID_TEMPLATE = """
- memcpy (meta_buf+offset, @SRC@->inode->gfid, 16);
- offset += 16;
-"""
-
-SERLZ_INTEGER_TEMPLATE = """
- memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@));
- offset += sizeof(@SRC@);
-"""
-
-SERLZ_LOC_TEMPLATE = """
- memcpy (meta_buf+offset, @SRC@.gfid, 16);
- offset += 16;
- memcpy (meta_buf+offset, @SRC@.pargfid, 16);
- offset += 16;
- if (@SRC@.name) {
- *(meta_buf+offset) = 1;
- ++offset;
- strcpy (meta_buf+offset, @SRC@.name);
- offset += (strlen (@SRC@.name) + 1);
- } else {
- *(meta_buf+offset) = 0;
- ++offset;
- }
-"""
-
-SERLZ_STRING_TEMPLATE = """
- if (@SRC@) {
- *(meta_buf+offset) = 1;
- ++offset;
- strcpy (meta_buf+offset, @SRC@);
- offset += strlen(@SRC@);
- } else {
- *(meta_buf+offset) = 0;
- ++offset;
- }
-"""
-
-SERLZ_VECTOR_TEMPLATE = """
- *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@);
- offset += sizeof(size_t);
- int32_t i;
- for (i = 0; i < @CNT@; ++i) {
- memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len);
- data_buf += @VEC@[i].iov_len;
- }
-"""
-
-# We don't need to save all of the fields - only those affected by chown,
-# chgrp, chmod, and utime.
-SERLZ_IATT_TEMPLATE = """
- *((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot;
- offset += sizeof(@SRC@.ia_prot);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid;
- offset += sizeof(@SRC@.ia_uid);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid;
- offset += sizeof(@SRC@.ia_gid);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime;
- offset += sizeof(@SRC@.ia_atime);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec;
- offset += sizeof(@SRC@.ia_atime_nsec);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime;
- offset += sizeof(@SRC@.ia_mtime);
- *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec;
- offset += sizeof(@SRC@.ia_mtime_nsec);
-"""
-
-typemap = {
- 'dict_t *': ( LEN_DICT_TEMPLATE, SERLZ_DICT_TEMPLATE),
- 'fd_t *': ( LEN_GFID_TEMPLATE, SERLZ_GFID_TEMPLATE),
- 'dev_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'gf_xattrop_flags_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'int32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'mode_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'off_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'size_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'uint32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
- 'loc_t *': ( LEN_LOC_TEMPLATE, SERLZ_LOC_TEMPLATE),
- 'const char *': ( LEN_STRING_TEMPLATE, SERLZ_STRING_TEMPLATE),
- 'struct iatt *': ( LEN_IATT_TEMPLATE, SERLZ_IATT_TEMPLATE),
-}
-
-def get_special_subs (args):
- len_code = ""
- ser_code = ""
- for arg in args:
- if (arg[0] != 'fop-arg') or (len(arg) < 4):
- continue
- # Let this throw an exception if we get an unknown field name. The
- # broken build will remind whoever messed with the stub code that a
- # corresponding update is needed here.
- if arg[3] == "vector":
- # Make it as obvious as possible that this is a special case.
- len_code += LEN_VECTOR_TEMPLATE \
- .replace("@VEC@", "stub->args.vector") \
- .replace("@CNT@", "stub->args.count")
- ser_code += SERLZ_VECTOR_TEMPLATE \
- .replace("@VEC@", "stub->args.vector") \
- .replace("@CNT@", "stub->args.count")
- else:
- len_tmpl, ser_tmpl = typemap[arg[2]]
- src = "stub->args.%s" % arg[3]
- len_code += len_tmpl.replace("@SRC@", src)
- ser_code += ser_tmpl.replace("@SRC@", src)
- return len_code, ser_code
-
-# Mention those fops in the selective_generate table, for which
-# only a few common functions will be generated, and mention those
-# functions. Rest of the functions can be customized
-selective_generate = {
- "ipc": "len,serialize",
- }
-
-def gen_fdl ():
- entrypoints = []
- for name, value in ops.items():
- if "journal" not in [ x[0] for x in value ]:
- continue
-
- # generate all functions for all the fops
- # except for the ones in selective_generate for which
- # generate only the functions mentioned in the
- # selective_generate table
- gen_funcs = "len,serialize,callback,continue,fop"
- if name in selective_generate:
- gen_funcs = selective_generate[name].split(",")
-
- len_code, ser_code = get_special_subs(value)
- fop_subs[name]["@LEN_CODE@"] = len_code[:-1]
- fop_subs[name]["@SER_CODE@"] = ser_code[:-1]
- if 'len' in gen_funcs:
- print(generate(LEN_TEMPLATE, name, fop_subs))
- if 'serialize' in gen_funcs:
- print(generate(SER_TEMPLATE, name, fop_subs))
- if name == 'writev':
- print("#define DESTAGE_ASYNC")
- if 'callback' in gen_funcs:
- print(generate(CBK_TEMPLATE, name, cbk_subs))
- if 'continue' in gen_funcs:
- print(generate(CONTINUE_TEMPLATE, name, fop_subs))
- if 'fop' in gen_funcs:
- print(generate(FOP_TEMPLATE, name, fop_subs))
- if name == 'writev':
- print("#undef DESTAGE_ASYNC")
- entrypoints.append(name)
- print("struct xlator_fops fops = {")
- for ep in entrypoints:
- print("\t.%s = fdl_%s," % (ep, ep))
- print("};")
-
-for l in open(sys.argv[1], 'r').readlines():
- if l.find('#pragma generate') != -1:
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- gen_fdl()
- print("/* END GENERATED CODE */")
- else:
- print(l[:-1])
diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py
deleted file mode 100755
index 0766f61320a..00000000000
--- a/xlators/experimental/fdl/src/gen_recon.py
+++ /dev/null
@@ -1,218 +0,0 @@
-#!/usr/bin/python3
-
-from __future__ import print_function
-import os
-import re
-import string
-import sys
-
-curdir = os.path.dirname (sys.argv[0])
-gendir = os.path.join (curdir, '../../../../libglusterfs/src')
-sys.path.append (gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# See the big header comment at the start of gen_fdl.py to see how the stages
-# fit together. The big difference here is that *all* of the C code is in the
-# template file as labelled fragments, instead of as Python strings. That
-# makes it much easier to edit in one place, with proper syntax highlighting
-# and indentation.
-#
-# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of
-# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE.
-#
-# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and
-# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution
-# in the middle of each function) is emitted immediately; the expanded CASE
-# code is saved for the next stage.
-#
-# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code
-# in the middle of EPILOG, to generate the whole output file.
-#
-# Another way of looking at it is to consider how the fragments appear in
-# the final output:
-#
-# PROLOG
-# FOP (expanded for CREATE)
-# FOP before FUNCTION_BODY
-# LOC, INTEGER, GFID, etc. (one per arg, by type)
-# FOP after FUNCTION_BODY
-# FOP (expanded for WRITEV)
-# FOP before FUNCTION_BODY
-# GFID, VECTOR, etc. (one per arg, by type)
-# FOP after FUNCTION_BODY
-# (more FOPs)
-# EPILOG
-# EPILOG before CASE
-# CASE statements (one per fop)
-# EPILOG after CASE
-
-typemap = {
- 'dict_t *': "DICT",
- 'fd_t *': "FD",
- 'dev_t': "DOUBLE",
- 'gf_xattrop_flags_t': "INTEGER",
- 'int32_t': "INTEGER",
- 'mode_t': "INTEGER",
- 'off_t': "DOUBLE",
- 'size_t': "DOUBLE",
- 'uint32_t': "INTEGER",
- 'loc_t *': "LOC",
- 'const char *': "STRING",
- 'struct iovec *': "VECTOR",
- 'struct iatt *': "IATT",
- 'struct iobref *': "IOBREF",
-}
-
-def get_special_subs (name, args, fop_type):
- code = ""
- cleanups = ""
- links = ""
- s_args = []
- for arg in args:
- if arg[0] == 'extra':
- code += "\t%s %s;\n\n" % (arg[2], arg[1])
- s_args.append(arg[3])
- continue
- if arg[0] == 'link':
- links += fragments["LINK"].replace("@INODE_ARG@", arg[1]) \
- .replace("@IATT_ARG@", arg[2])
- continue
- if arg[0] != 'fop-arg':
- continue
- if (name, arg[1]) == ('writev', 'count'):
- # Special case: just skip this. We can't mark it as 'nosync'
- # because of the way the translator and dumper generators look for
- # that after 'stub-name' which we don't define. Instead of adding a
- # bunch of generic infrastructure for this one case, just pound it
- # here.
- continue
- recon_type = typemap[arg[2]]
- # print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type)
- if (name == "create") and (arg[1] == "fd"):
- # Special case: fd for create is new, not looked up.
- # print "/* change to NEW_FD */"
- recon_type = "NEW_FD"
- elif (recon_type == "LOC") and (fop_type == "entry-op"):
- # Need to treat this differently for inode vs. entry ops.
- # Special case: link source is treated like inode-op.
- if (name != "link") or (arg[1] != "oldloc"):
- # print "/* change to PARENT_LOC */"
- recon_type = "PARENT_LOC"
- code += fragments[recon_type].replace("@ARGNAME@", arg[1]) \
- .replace("@ARGTYPE@", arg[2])
- cleanup_key = recon_type + "_CLEANUP"
- if cleanup_key in fragments:
- new_frag = fragments[cleanup_key].replace("@ARGNAME@", arg[1])
- # Make sure these get added in *reverse* order. Otherwise, a
- # failure for an earlier argument might goto a label that falls
- # through to the cleanup code for a variable associated with a
- # later argument, but that variable might not even have been
- # *declared* (let alone initialized) yet. Consider the following
- # case.
- #
- # process argument A (on failure goto cleanup_A)
- # set error label to cleanup_A
- #
- # declare pointer variable for argument B
- # process argument B (on failure goto cleanup_B)
- #
- # cleanup_A:
- # /* whatever */
- # cleanup_B:
- # free pointer variable <= "USED BUT NOT SET" error here
- #
- # By adding these in reverse order, we ensure that cleanup_B is
- # actually *before* cleanup_A, and nothing will try to do the free
- # until we've actually attempted processing of B.
- cleanups = new_frag + cleanups
- if 'nosync' in arg[4:]:
- code += "\t(void)%s;\n" % arg[1];
- continue
- if arg[2] in ("loc_t *", "struct iatt *"):
- # These are passed as pointers to the syncop, but they're actual
- # structures in the generated code.
- s_args.append("&"+arg[1]);
- else:
- s_args.append(arg[1])
- # We have to handle a couple of special cases here, because some n00b
- # defined the syncops with a different argument order than the fops they're
- # based on.
- if name == 'writev':
- # Swap 'flags' and 'iobref'. Also, we need to add the iov count, which
- # is not stored in or read from the journal. There are other ways to
- # do that, but this is the only place we need anything similar and we
- # already have to treat it as a special case so this is simplest.
- s_args_str = 'fd, &vector, 1, off, iobref, flags, &preop, &postop, xdata'
- elif name == 'symlink':
- # Swap 'linkpath' and 'loc'.
- s_args_str = '&loc, linkpath, &iatt, xdata'
- elif name == 'xattrop':
- s_args_str = '&loc, flags, dict, xdata, NULL'
- elif name == 'fxattrop':
- s_args_str = 'fd, flags, dict, xdata, NULL'
- else:
- s_args_str = ', '.join(s_args)
- return code, links, s_args_str, cleanups
-
-# TBD: probably need to generate type-specific cleanup code as well - e.g.
-# fd_unref for an fd_t, loc_wipe for a loc_t, and so on. All of these
-# generated CLEANUP fragments will go at the end of the function, with goto
-# labels. Meanwhile, the error-checking part of each type-specific fragment
-# (e.g. LOC or FD) will need to update the indirect label that we jump to when
-# an error is detected. This will probably get messy.
-def gen_functions ():
- code = ""
- for name, value in ops.items():
- fop_type = [ x[1] for x in value if x[0] == "journal" ]
- if not fop_type:
- continue
- body, links, syncop_args, cleanups = get_special_subs (name, value,
- fop_type[0])
- fop_subs[name]["@FUNCTION_BODY@"] = body
- fop_subs[name]["@LINKS@"] = links
- fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args
- fop_subs[name]["@CLEANUPS@"] = cleanups
- if name == "writev":
- # Take advantage of the fact that, *during reconciliation*, the
- # vector is always a single element. In normal I/O it's not.
- fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len"
- else:
- fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS"
- # Print the FOP fragment with @FUNCTION_BODY@ in the middle.
- code += generate(fragments["FOP"], name, fop_subs)
- return code
-
-def gen_cases ():
- code = ""
- for name, value in ops.items():
- if "journal" not in [ x[0] for x in value ]:
- continue
- # Add the CASE fragment for this fop.
- code += generate(fragments["CASE"], name, fop_subs)
- return code
-
-def load_fragments (path="recon-tmpl.c"):
- pragma_re = re.compile('pragma fragment (.*)')
- cur_symbol = None
- cur_value = ""
- result = {}
- for line in open(path, "r").readlines():
- m = pragma_re.search(line)
- if m:
- if cur_symbol:
- result[cur_symbol] = cur_value
- cur_symbol = m.group(1)
- cur_value = ""
- else:
- cur_value += line
- if cur_symbol:
- result[cur_symbol] = cur_value
- return result
-
-if __name__ == "__main__":
- fragments = load_fragments(sys.argv[1])
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- print(fragments["PROLOG"])
- print(gen_functions())
- print(fragments["EPILOG"].replace("@SWITCH_BODY@", gen_cases()))
- print("/* END GENERATED CODE */")
diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c
deleted file mode 100644
index 6fbc5218d47..00000000000
--- a/xlators/experimental/fdl/src/logdump.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/mman.h>
-
-extern int
-fdl_dump(char **, char **);
-
-int
-main(int argc, char **argv)
-{
- int meta_fd = (-1);
- char *meta_buf = NULL;
- int data_fd = (-1);
- char *data_buf = NULL;
-
- meta_fd = open(argv[1], O_RDONLY);
- if (meta_fd < 0) {
- perror("open");
- return EXIT_FAILURE;
- }
-
- /* TBD: get proper length */
- meta_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0);
- if (meta_buf == MAP_FAILED) {
- perror("mmap");
- return EXIT_FAILURE;
- }
-
- data_fd = open(argv[2], O_RDONLY);
- if (data_fd < 0) {
- perror("open");
- return EXIT_FAILURE;
- }
-
- /* TBD: get proper length */
- data_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0);
- if (data_buf == MAP_FAILED) {
- perror("mmap");
- return EXIT_FAILURE;
- }
-
- for (;;) {
- if (!fdl_dump(&meta_buf, &data_buf)) {
- break;
- }
- }
-
- return EXIT_SUCCESS;
-}
diff --git a/xlators/experimental/fdl/src/recon-tmpl.c.in b/xlators/experimental/fdl/src/recon-tmpl.c.in
deleted file mode 100644
index 5115dfd5c75..00000000000
--- a/xlators/experimental/fdl/src/recon-tmpl.c.in
+++ /dev/null
@@ -1,297 +0,0 @@
-#pragma fragment PROLOG
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "iatt.h"
-#include "syncop.h"
-#include "xlator.h"
-#include "glfs-internal.h"
-
-#include "fdl.h"
-
-#define GFAPI_SUCCESS 0
-
-inode_t *
-recon_get_inode(glfs_t *fs, uuid_t gfid)
-{
- inode_t *inode;
- loc_t loc = {
- NULL,
- };
- struct iatt iatt;
- int ret;
- inode_t *newinode;
-
- inode = inode_find(fs->active_subvol->itable, gfid);
- if (inode) {
- printf("=== FOUND %s IN TABLE\n", uuid_utoa(gfid));
- return inode;
- }
-
- loc.inode = inode_new(fs->active_subvol->itable);
- if (!loc.inode) {
- return NULL;
- }
- gf_uuid_copy(loc.inode->gfid, gfid);
- gf_uuid_copy(loc.gfid, gfid);
-
- printf("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid));
-
- ret = syncop_lookup(fs->active_subvol, &loc, &iatt, NULL, NULL, NULL);
- if (ret != GFAPI_SUCCESS) {
- fprintf(stderr, "syncop_lookup failed (%d)\n", ret);
- return NULL;
- }
-
- newinode = inode_link(loc.inode, NULL, NULL, &iatt);
- if (newinode) {
- inode_lookup(newinode);
- }
-
- return newinode;
-}
-
-#pragma fragment DICT
-dict_t *@ARGNAME@;
-
-@ARGNAME@ = dict_new();
-if (!@ARGNAME@) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-
-{
- int key_len, data_len;
- char *key_ptr;
- int garbage;
- for (;;) {
- key_len = *((int *)new_meta);
- new_meta += sizeof(int);
- if (!key_len) {
- break;
- }
- key_ptr = new_meta;
- new_meta += key_len;
- data_len = *((int *)new_meta);
- new_meta += sizeof(int);
- garbage = dict_set_static_bin(@ARGNAME@, key_ptr, new_meta, data_len);
- /* TBD: check error from dict_set_static_bin */
- (void)garbage;
- new_meta += data_len;
- }
-}
-
-#pragma fragment DICT_CLEANUP
-cleanup_@ARGNAME@ : dict_unref(@ARGNAME@);
-
-#pragma fragment DOUBLE
-@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
-new_meta += sizeof(uint64_t);
-
-#pragma fragment FD
-inode_t *@ARGNAME@_ino;
-fd_t *@ARGNAME@;
-
-@ARGNAME@_ino = recon_get_inode(fs, *((uuid_t *)new_meta));
-new_meta += 16;
-if (!@ARGNAME@_ino) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@_ino;
-
-@ARGNAME@ = fd_anonymous(@ARGNAME@_ino);
-if (!@ARGNAME@) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-
-#pragma fragment FD_CLEANUP
-cleanup_@ARGNAME@ : fd_unref(@ARGNAME@);
-cleanup_@ARGNAME@_ino : inode_unref(@ARGNAME@_ino);
-
-#pragma fragment NEW_FD
-/*
- * This pseudo-type is only used for create, and in that case we know
- * we'll be using loc.inode, so it's not worth generalizing to take an
- * extra argument.
- */
-fd_t *@ARGNAME@ = fd_anonymous(loc.inode);
-
-if (!fd) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-new_meta += 16;
-
-#pragma fragment NEW_FD_CLEANUP
-cleanup_@ARGNAME@ : fd_unref(@ARGNAME@);
-
-#pragma fragment INTEGER
-@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
-
-new_meta += sizeof(@ARGTYPE@);
-
-#pragma fragment LOC
-loc_t @ARGNAME@ = {
- NULL,
-};
-
-@ARGNAME@.inode = recon_get_inode(fs, *((uuid_t *)new_meta));
-if (!@ARGNAME@.inode) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-gf_uuid_copy(@ARGNAME@.gfid, @ARGNAME@.inode->gfid);
-new_meta += 16;
-new_meta += 16; /* skip over pargfid */
-if (*(new_meta++)) {
- @ARGNAME@.name = new_meta;
- new_meta += strlen(new_meta) + 1;
-}
-
-#pragma fragment LOC_CLEANUP
-cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@);
-
-#pragma fragment PARENT_LOC
-loc_t @ARGNAME@ = {
- NULL,
-};
-
-new_meta += 16; /* skip over gfid */
-@ARGNAME@.parent = recon_get_inode(fs, *((uuid_t *)new_meta));
-if (!@ARGNAME@.parent) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-gf_uuid_copy(@ARGNAME@.pargfid, @ARGNAME@.parent->gfid);
-new_meta += 16;
-if (!*(new_meta++)) {
- goto *err_label;
-}
-@ARGNAME@.name = new_meta;
-new_meta += strlen(new_meta) + 1;
-
-@ARGNAME@.inode = inode_new(fs->active_subvol->itable);
-if (!@ARGNAME@.inode) {
- goto *err_label;
-}
-
-#pragma fragment PARENT_LOC_CLEANUP
-cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@);
-
-#pragma fragment STRING
-char *@ARGNAME@;
-if (*(new_meta++)) {
- @ARGNAME@ = new_meta;
- new_meta += (strlen(new_meta) + 1);
-} else {
- goto *err_label;
-}
-
-#pragma fragment VECTOR
-struct iovec @ARGNAME@;
-
-@ARGNAME@.iov_len = *((size_t *)new_meta);
-new_meta += sizeof(@ARGNAME@.iov_len);
-@ARGNAME@.iov_base = new_data;
-new_data += @ARGNAME@.iov_len;
-
-#pragma fragment IATT
-struct iatt @ARGNAME@;
-{
- @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta);
- new_meta += sizeof(ia_prot_t);
- uint32_t *myints = (uint32_t *)new_meta;
- @ARGNAME@.ia_uid = myints[0];
- @ARGNAME@.ia_gid = myints[1];
- @ARGNAME@.ia_atime = myints[2];
- @ARGNAME@.ia_atime_nsec = myints[3];
- @ARGNAME@.ia_mtime = myints[4];
- @ARGNAME@.ia_mtime_nsec = myints[5];
- new_meta += sizeof(*myints) * 6;
-}
-
-#pragma fragment IOBREF
-struct iobref *@ARGNAME@;
-
-@ARGNAME@ = iobref_new();
-if (!@ARGNAME@) {
- goto *err_label;
-}
-err_label = &&cleanup_@ARGNAME@;
-
-#pragma fragment IOBREF_CLEANUP
-cleanup_@ARGNAME@ : iobref_unref(@ARGNAME@);
-
-#pragma fragment LINK
-/* TBD: check error */
-inode_t *new_inode = inode_link(@INODE_ARG@, NULL, NULL, @IATT_ARG@);
-if (new_inode) {
- inode_lookup(new_inode);
-}
-
-#pragma fragment FOP
-int fdl_replay_@NAME@(glfs_t *fs, char **old_meta, char **old_data)
-{
- char *new_meta = *old_meta;
- char *new_data = *old_data;
- int ret;
- int status = 0xbad;
- void *err_label = &&done;
-
- @FUNCTION_BODY@
-
- ret = syncop_@NAME@(fs->active_subvol, @SYNCOP_ARGS@, NULL);
- if (ret !=@SUCCESS_VALUE@) {
- fprintf(stderr, "syncop_@NAME@ returned %d", ret);
- goto *err_label;
- }
-
- @LINKS@
-
- status = 0;
-
- @CLEANUPS@
-
- done : *old_meta = new_meta;
- *old_data = new_data;
- return status;
-}
-
-#pragma fragment CASE
-case GF_FOP_@UPNAME@:
- printf("=== GF_FOP_@UPNAME@\n");
- if (fdl_replay_@NAME@(fs, &new_meta, &new_data) != 0) {
- goto done;
- }
- recognized = 1;
- break;
-
-#pragma fragment EPILOG
- int
- recon_execute(glfs_t *fs, char **old_meta, char **old_data)
- {
- char *new_meta = *old_meta;
- char *new_data = *old_data;
- int recognized = 0;
- event_header_t *eh;
-
- eh = (event_header_t *)new_meta;
- new_meta += sizeof(*eh);
-
- /* TBD: check event_type instead of assuming NEW_REQUEST */
-
- switch (eh->fop_type) {
- @SWITCH_BODY@
-
- default : printf("unknown fop %u\n", eh->fop_type);
- }
-
- done:
- *old_meta = new_meta;
- *old_data = new_data;
- return recognized;
- }
diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c
deleted file mode 100644
index ec1bf37dad9..00000000000
--- a/xlators/experimental/fdl/src/recon.c
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/mman.h>
-
-#include "glusterfs.h"
-#include "fd.h"
-#include "syncop.h"
-#include "glfs-internal.h"
-
-#define GFAPI_SUCCESS 0
-
-extern int
-recon_execute(glfs_t *, char **, char **);
-
-int
-main(int argc, char **argv)
-{
- glfs_t *fs;
- int ret;
- int meta_fd = (-1);
- char *meta_buf = NULL;
- int data_fd = (-1);
- char *data_buf = NULL;
-
- fs = glfs_new("whocares");
- if (!fs) {
- fprintf(stderr, "glfs_new failed\n");
- return EXIT_FAILURE;
- }
-
- if (getenv("RECON_DEBUG")) {
- ret = glfs_set_logging(fs, "/dev/stderr", 7);
- } else {
- ret = glfs_set_logging(fs, "/dev/null", 0);
- }
-
- if (ret != GFAPI_SUCCESS) {
- fprintf(stderr, "glfs_set_logging failed (%d)\n", errno);
- return EXIT_FAILURE;
- }
-
- ret = glfs_set_volfile(fs, argv[1]);
- if (ret != GFAPI_SUCCESS) {
- fprintf(stderr, "glfs_set_volfile failed (%d)\n", errno);
- return EXIT_FAILURE;
- }
-
- ret = glfs_init(fs);
- if (ret != GFAPI_SUCCESS) {
- fprintf(stderr, "glfs_init failed (%d)\n", errno);
- return EXIT_FAILURE;
- }
-
- meta_fd = open(argv[2], O_RDONLY);
- if (meta_fd < 0) {
- perror("open");
- return EXIT_FAILURE;
- }
-
- /* TBD: get proper length */
- meta_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0);
- if (meta_buf == MAP_FAILED) {
- perror("mmap");
- return EXIT_FAILURE;
- }
-
- data_fd = open(argv[3], O_RDONLY);
- if (data_fd < 0) {
- perror("open");
- return EXIT_FAILURE;
- }
-
- /* TBD: get proper length */
- data_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0);
- if (data_buf == MAP_FAILED) {
- perror("mmap");
- return EXIT_FAILURE;
- }
-
- for (;;) {
- if (!recon_execute(fs, &meta_buf, &data_buf)) {
- break;
- }
- }
-
- return EXIT_SUCCESS;
-}
diff --git a/xlators/experimental/jbr-client/Makefile.am b/xlators/experimental/jbr-client/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/jbr-client/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/jbr-client/src/Makefile.am b/xlators/experimental/jbr-client/src/Makefile.am
deleted file mode 100644
index c71f5ff1e83..00000000000
--- a/xlators/experimental/jbr-client/src/Makefile.am
+++ /dev/null
@@ -1,34 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = jbrc.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-nodist_jbrc_la_SOURCES = jbrc-cg.c
-CLEANFILES = $(nodist_jbrc_la_SOURCES)
-
-jbrc_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-jbrc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = $(top_srcdir)/xlators/lib/src/libxlator.h \
- $(top_srcdir)/glusterfsd/src/glusterfsd.h \
- jbrc.h jbr-messages.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
- -I$(top_srcdir)/rpc/rpc-lib/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-JBRC_PREFIX = $(top_srcdir)/xlators/experimental/jbr-client/src
-JBRC_GEN_FOPS = $(JBRC_PREFIX)/gen-fops.py
-JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c.in
-JBRC_WRAPPER = $(JBRC_PREFIX)/jbrc.c
-noinst_PYTHON = $(JBRC_GEN_FOPS)
-EXTRA_DIST = $(JBRC_TEMPLATES) $(JBRC_WRAPPER)
-
-jbrc-cg.c: $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER)
- $(PYTHON) $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER) > $@
-
-uninstall-local:
- rm -f $(DESTDIR)$(xlatordir)/jbr.so
diff --git a/xlators/experimental/jbr-client/src/fop-template.c.in b/xlators/experimental/jbr-client/src/fop-template.c.in
deleted file mode 100644
index 9732badc794..00000000000
--- a/xlators/experimental/jbr-client/src/fop-template.c.in
+++ /dev/null
@@ -1,102 +0,0 @@
-/* template-name fop */
-int32_t jbrc_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbrc_local_t *local = NULL;
- xlator_t *target_xl = ACTIVE_CHILD(this);
-
- local = mem_get(this->local_pool);
- if (!local) {
- goto err;
- }
-
- local->stub = fop_@NAME@_stub(frame, jbrc_@NAME@_continue, @SHORT_ARGS@);
- if (!local->stub) {
- goto err;
- }
- local->curr_xl = target_xl;
- local->scars = 0;
-
- frame->local = local;
- STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, target_xl, target_xl,
- target_xl->fops->@NAME@, @SHORT_ARGS@);
- return 0;
-
-err:
- if (local) {
- mem_put(local);
- }
- STACK_UNWIND_STRICT(@NAME@, frame, -1, ENOMEM, @ERROR_ARGS@);
- return 0;
-}
-
-/* template-name cbk */
-int32_t jbrc_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
-{
- jbrc_local_t *local = frame->local;
- xlator_t *last_xl = cookie;
- xlator_t *next_xl;
- jbrc_private_t *priv = this->private;
- struct timespec spec;
-
- if (op_ret != (-1)) {
- if (local->scars) {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG,
- HILITE("retried %p OK"), frame->local);
- }
- priv->active = last_xl;
- goto unwind;
- }
- if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) {
- goto unwind;
- }
-
- /* TBD: get leader ID from xdata? */
- next_xl = next_xlator(this, last_xl);
- /*
- * We can't just give up after we've tried all bricks, because it's
- * quite likely that a new leader election just hasn't finished yet.
- * We also shouldn't retry endlessly, and especially not at a high
- * rate, but that's good enough while we work on other things.
- *
- * TBD: implement slow/finite retry via a worker thread
- */
- if (!next_xl || (local->scars >= SCAR_LIMIT)) {
- gf_msg(this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG,
- HILITE("ran out of retries for %p"), frame->local);
- goto unwind;
- }
-
- local->curr_xl = next_xl;
- local->scars += 1;
- spec.tv_sec = 1;
- spec.tv_nsec = 0;
- /*
- * WARNING
- *
- * Just calling gf_timer_call_after like this leaves open the
- * possibility that writes will get reordered, if a first write is
- * rescheduled and then a second comes along to find an updated
- * priv->active before the first actually executes. We might need to
- * implement a stricter (and more complicated) queuing mechanism to
- * ensure absolute consistency in this case.
- */
- if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) {
- return 0;
- }
-
-unwind:
- call_stub_destroy(local->stub);
- STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
- return 0;
-}
-
-/* template-name cont-func */
-int32_t jbrc_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbrc_local_t *local = frame->local;
-
- STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, local->curr_xl, local->curr_xl,
- local->curr_xl->fops->@NAME@, @SHORT_ARGS@);
- return 0;
-}
diff --git a/xlators/experimental/jbr-client/src/gen-fops.py b/xlators/experimental/jbr-client/src/gen-fops.py
deleted file mode 100755
index a5ddf577fbe..00000000000
--- a/xlators/experimental/jbr-client/src/gen-fops.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/python3
-
-from __future__ import print_function
-import os
-import re
-import string
-import sys
-
-curdir = os.path.dirname(sys.argv[0])
-gendir = os.path.join(curdir, '../../../../libglusterfs/src')
-sys.path.append(gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# We really want the callback argument list, even when we're generating fop
-# code, so we propagate here.
-# TBD: this should probably be right in generate.py
-for k, v in cbk_subs.items():
- fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@']
-
-# Stolen from old codegen.py
-def load_templates (path):
- templates = {}
- tmpl_re = re.compile("/\* template-name (.*) \*/")
- templates = {}
- t_name = None
- for line in open(path, "r").readlines():
- if not line:
- break
- m = tmpl_re.match(line)
- if m:
- if t_name:
- templates[t_name] = ''.join(t_contents)
- t_name = m.group(1).strip()
- t_contents = []
- elif t_name:
- t_contents.append(line)
- if t_name:
- templates[t_name] = ''.join(t_contents)
- return templates
-
-# Stolen from gen_fdl.py
-def gen_client (templates):
- for name, value in ops.items():
- if name == 'getspec':
- # It's not real if it doesn't have a stub function.
- continue
- print(generate(templates['cbk'], name, cbk_subs))
- print(generate(templates['cont-func'], name, fop_subs))
- print(generate(templates['fop'], name, fop_subs))
-
-tmpl = load_templates(sys.argv[1])
-for l in open(sys.argv[2], 'r').readlines():
- if l.find('#pragma generate') != -1:
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- gen_client(tmpl)
- print("/* END GENERATED CODE */")
- else:
- print(l[:-1])
diff --git a/xlators/experimental/jbr-client/src/jbr-messages.h b/xlators/experimental/jbr-client/src/jbr-messages.h
deleted file mode 100644
index ecbf569ec13..00000000000
--- a/xlators/experimental/jbr-client/src/jbr-messages.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _JBR_MESSAGES_H_
-#define _JBR_MESSAGES_H_
-
-#include "glfs-message-id.h"
-
-/* To add new message IDs, append new identifiers at the end of the list.
- *
- * Never remove a message ID. If it's not used anymore, you can rename it or
- * leave it as it is, but not delete it. This is to prevent reutilization of
- * IDs by other messages.
- *
- * The component name must match one of the entries defined in
- * glfs-message-id.h.
- */
-
-GLFS_MSGID(JBR, J_MSG_INIT_FAIL, J_MSG_RETRY_MSG, J_MSG_MEM_ERR, J_MSG_DICT_FLR,
- J_MSG_GENERIC, J_MSG_INVALID, J_MSG_NO_DATA, J_MSG_SYS_CALL_FAILURE,
- J_MSG_QUORUM_NOT_MET, J_MSG_LOCK_FAILURE);
-
-#endif /* _JBR_MESSAGES_H_ */
diff --git a/xlators/experimental/jbr-client/src/jbrc.c b/xlators/experimental/jbr-client/src/jbrc.c
deleted file mode 100644
index 28801ecc99f..00000000000
--- a/xlators/experimental/jbr-client/src/jbrc.c
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "call-stub.h"
-#include "defaults.h"
-#include "timer.h"
-#include "xlator.h"
-#include "jbr-messages.h"
-#include "jbrc.h"
-#include "statedump.h"
-
-#define SCAR_LIMIT 20
-#define HILITE(x) ("" x "")
-
-/*
- * The fops are actually generated by gen-fops.py; the rest was mostly copied
- * from defaults.c (commit cd253754 on 27 August 2013).
- */
-
-enum gf_dht_mem_types_ {
- gf_mt_jbrc_private_t = gf_common_mt_end + 1,
- gf_mt_jbrc_end
-};
-
-char *JBRC_XATTR = "user.jbr.active";
-
-static inline xlator_t *
-ACTIVE_CHILD(xlator_t *parent)
-{
- jbrc_private_t *priv = parent->private;
-
- return priv ? priv->active : FIRST_CHILD(parent);
-}
-
-xlator_t *
-next_xlator(xlator_t *this, xlator_t *prev)
-{
- xlator_list_t *trav;
-
- for (trav = this->children; trav; trav = trav->next) {
- if (trav->xlator == prev) {
- return trav->next ? trav->next->xlator : this->children->xlator;
- }
- }
-
- return NULL;
-}
-
-void
-jbrc_retry_cb(void *cb_arg)
-{
- jbrc_local_t *local = cb_arg;
-
- gf_msg(__func__, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, HILITE("retrying %p"),
- local);
- call_resume_wind(local->stub);
-}
-
-#pragma generate
-
-int32_t
-jbrc_forget(xlator_t *this, inode_t *inode)
-{
- gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
- "xlator does not implement forget_cbk");
- return 0;
-}
-
-int32_t
-jbrc_releasedir(xlator_t *this, fd_t *fd)
-{
- gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
- "xlator does not implement releasedir_cbk");
- return 0;
-}
-
-int32_t
-jbrc_release(xlator_t *this, fd_t *fd)
-{
- gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL,
- "xlator does not implement release_cbk");
- return 0;
-}
-
-struct xlator_fops fops = {
- .lookup = jbrc_lookup,
- .stat = jbrc_stat,
- .fstat = jbrc_fstat,
- .truncate = jbrc_truncate,
- .ftruncate = jbrc_ftruncate,
- .access = jbrc_access,
- .readlink = jbrc_readlink,
- .mknod = jbrc_mknod,
- .mkdir = jbrc_mkdir,
- .unlink = jbrc_unlink,
- .rmdir = jbrc_rmdir,
- .symlink = jbrc_symlink,
- .rename = jbrc_rename,
- .link = jbrc_link,
- .create = jbrc_create,
- .open = jbrc_open,
- .readv = jbrc_readv,
- .writev = jbrc_writev,
- .flush = jbrc_flush,
- .fsync = jbrc_fsync,
- .opendir = jbrc_opendir,
- .readdir = jbrc_readdir,
- .readdirp = jbrc_readdirp,
- .fsyncdir = jbrc_fsyncdir,
- .statfs = jbrc_statfs,
- .setxattr = jbrc_setxattr,
- .getxattr = jbrc_getxattr,
- .fsetxattr = jbrc_fsetxattr,
- .fgetxattr = jbrc_fgetxattr,
- .removexattr = jbrc_removexattr,
- .fremovexattr = jbrc_fremovexattr,
- .lk = jbrc_lk,
- .inodelk = jbrc_inodelk,
- .finodelk = jbrc_finodelk,
- .entrylk = jbrc_entrylk,
- .fentrylk = jbrc_fentrylk,
- .rchecksum = jbrc_rchecksum,
- .xattrop = jbrc_xattrop,
- .fxattrop = jbrc_fxattrop,
- .setattr = jbrc_setattr,
- .fsetattr = jbrc_fsetattr,
- .fallocate = jbrc_fallocate,
- .discard = jbrc_discard,
-};
-
-struct xlator_cbks cbks = {};
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbrc", this, out);
-
- ret = xlator_mem_acct_init(this, gf_mt_jbrc_end + 1);
-
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
- "Memory accounting init failed");
- return ret;
- }
-out:
- return ret;
-}
-
-int32_t
-jbrc_init(xlator_t *this)
-{
- jbrc_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
-
- this->local_pool = mem_pool_new(jbrc_local_t, 128);
- if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
- "failed to create jbrc_local_t pool");
- goto err;
- }
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_mt_jbrc_private_t);
- if (!priv) {
- goto err;
- }
-
- for (trav = this->children; trav; trav = trav->next) {
- ++(priv->n_children);
- }
-
- priv->active = FIRST_CHILD(this);
- this->private = priv;
- return 0;
-
-err:
- if (priv) {
- GF_FREE(priv);
- }
- return -1;
-}
-
-void
-jbrc_fini(xlator_t *this)
-{
- GF_FREE(this->private);
-}
-
-int
-jbrc_get_child_index(xlator_t *this, xlator_t *kid)
-{
- xlator_list_t *trav;
- int retval = -1;
-
- for (trav = this->children; trav; trav = trav->next) {
- ++retval;
- if (trav->xlator == kid) {
- return retval;
- }
- }
-
- return -1;
-}
-
-uint8_t
-jbrc_count_up_kids(jbrc_private_t *priv)
-{
- uint8_t retval = 0;
- uint8_t i;
-
- for (i = 0; i < priv->n_children; ++i) {
- if (priv->kid_state & (1 << i)) {
- ++retval;
- }
- }
-
- return retval;
-}
-
-int32_t
-jbrc_notify(xlator_t *this, int32_t event, void *data, ...)
-{
- int32_t ret = 0;
- int32_t index = 0;
- jbrc_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- index = jbrc_get_child_index(this, data);
- if (index >= 0) {
- priv->kid_state |= (1 << index);
- priv->up_children = jbrc_count_up_kids(priv);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "got CHILD_UP for %s, now %u kids",
- ((xlator_t *)data)->name, priv->up_children);
- }
- ret = default_notify(this, event, data);
- break;
- case GF_EVENT_CHILD_DOWN:
- index = jbrc_get_child_index(this, data);
- if (index >= 0) {
- priv->kid_state &= ~(1 << index);
- priv->up_children = jbrc_count_up_kids(priv);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "got CHILD_DOWN for %s, now %u kids",
- ((xlator_t *)data)->name, priv->up_children);
- }
- break;
- default:
- ret = default_notify(this, event, data);
- }
-
-out:
- return ret;
-}
-
-int
-jbrc_priv_dump(xlator_t *this)
-{
- jbrc_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- xlator_list_t *trav = NULL;
- int32_t i = -1;
-
- GF_VALIDATE_OR_GOTO(THIS->name, this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section("%s", key_prefix);
-
- gf_proc_dump_write("up_children", "%u", priv->up_children);
-
- for (trav = this->children, i = 0; trav; trav = trav->next, i++) {
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i);
- gf_proc_dump_write(key_prefix, "%s", trav->xlator->name);
- }
-
-out:
- return 0;
-}
-
-struct xlator_dumpops dumpops = {
- .priv = jbrc_priv_dump,
-};
-
-class_methods_t class_methods = {
- .init = jbrc_init,
- .fini = jbrc_fini,
- .notify = jbrc_notify,
-};
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/jbr-client/src/jbrc.h b/xlators/experimental/jbr-client/src/jbrc.h
deleted file mode 100644
index f99178402b3..00000000000
--- a/xlators/experimental/jbr-client/src/jbrc.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _JBRC_H_
-#define _JBRC_H_
-
-typedef struct {
- xlator_t *active;
- uint8_t up_children;
- uint8_t n_children;
- uint32_t kid_state;
-} jbrc_private_t;
-
-typedef struct {
- call_stub_t *stub;
- xlator_t *curr_xl;
- uint16_t scars;
-} jbrc_local_t;
-
-#endif /* _JBRC_H_ */
diff --git a/xlators/experimental/jbr-server/Makefile.am b/xlators/experimental/jbr-server/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/jbr-server/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/jbr-server/src/Makefile.am b/xlators/experimental/jbr-server/src/Makefile.am
deleted file mode 100644
index 42d3c8a6c36..00000000000
--- a/xlators/experimental/jbr-server/src/Makefile.am
+++ /dev/null
@@ -1,39 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = jbr.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-nodist_jbr_la_SOURCES = jbr-cg.c
-CLEANFILES = $(nodist_jbr_la_SOURCES)
-
-jbr_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-jbr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/api/src/libgfapi.la
-
-noinst_HEADERS = jbr-internal.h \
- $(top_srcdir)/xlators/lib/src/libxlator.h \
- $(top_srcdir)/xlators/experimental/fdl/src/fdl.h \
- $(top_srcdir)/glusterfsd/src/glusterfsd.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
- -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src \
- -I$(top_srcdir)/xlators/experimental/fdl/src/ \
- -DSBIN_DIR=\"$(sbindir)\" -I$(top_srcdir)/api/src \
- -DJBR_SCRIPT_PREFIX=\"$(jbrdir)\" \
- -I$(top_srcdir)/xlators/experimental/jbr-client/src/
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-JBR_PREFIX = $(top_srcdir)/xlators/experimental/jbr-server/src
-JBR_GEN_FOPS = $(JBR_PREFIX)/gen-fops.py
-JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c.in
-JBR_WRAPPER = $(JBR_PREFIX)/jbr.c
-noinst_PYTHON = $(JBR_GEN_FOPS)
-EXTRA_DIST = $(JBR_TEMPLATES) $(JBR_WRAPPER)
-
-jbr-cg.c: $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER)
- $(PYTHON) $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER) > $@
-
-uninstall-local:
- rm -f $(DESTDIR)$(xlatordir)/jbr.so
diff --git a/xlators/experimental/jbr-server/src/all-templates.c.in b/xlators/experimental/jbr-server/src/all-templates.c.in
deleted file mode 100644
index a9d57fc646f..00000000000
--- a/xlators/experimental/jbr-server/src/all-templates.c.in
+++ /dev/null
@@ -1,501 +0,0 @@
-/*
- * You can put anything here - it doesn't even have to be a comment - and it
- * will be ignored until we reach the first template-name comment.
- */
-
-/* template-name read-fop */
-int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbr_private_t *priv = NULL;
- gf_boolean_t in_recon = _gf_false;
- int32_t op_errno = 0;
- int32_t recon_term, recon_index;
-
- GF_VALIDATE_OR_GOTO("jbr", this, err);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, err);
- GF_VALIDATE_OR_GOTO(this->name, frame, err);
-
- op_errno = EREMOTE;
-
- /* allow reads during reconciliation *
- * TBD: allow "dirty" reads on non-leaders *
- */
- if (xdata && (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) &&
- (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) {
- in_recon = _gf_true;
- }
-
- if ((!priv->leader) && (in_recon == _gf_false)) {
- goto err;
- }
-
- STACK_WIND(frame, default_@NAME@_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
- return 0;
-
-err:
- STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@);
- return 0;
-}
-
-/* template-name read-perform_local_op */
-/* No "perform_local_op" function needed for @NAME@ */
-
-/* template-name read-dispatch */
-/* No "dispatch" function needed for @NAME@ */
-
-/* template-name read-call_dispatch */
-/* No "call_dispatch" function needed for @NAME@ */
-
-/* template-name read-fan-in */
-/* No "fan-in" function needed for @NAME@ */
-
-/* template-name read-continue */
-/* No "continue" function needed for @NAME@ */
-
-/* template-name read-complete */
-/* No "complete" function needed for @NAME@ */
-
-/* template-name write-fop */
-int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
- int op_errno = ENOMEM;
-
- GF_VALIDATE_OR_GOTO("jbr", this, err);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, err);
- GF_VALIDATE_OR_GOTO(this->name, frame, err);
-
-#if defined(JBR_CG_NEED_FD)
- ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, fd);
-#else
- ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, NULL);
-#endif
- if (ret)
- goto err;
-
- local = frame->local;
-
- /*
- * If we let it through despite not being the leader, then we just want
- * to pass it on down without all of the additional xattrs, queuing, and
- * so on. However, jbr_*_complete does depend on the initialization
- * immediately above this.
- */
- if (!priv->leader) {
- STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
- return 0;
- }
-
- ret = jbr_initialize_xdata_set_attrs(this, &xdata);
- if (ret)
- goto err;
-
- local->xdata = dict_ref(xdata);
- local->stub = fop_@NAME@_stub(frame, jbr_@NAME@_continue, @SHORT_ARGS@);
- if (!local->stub) {
- goto err;
- }
-
- /*
- * Can be used to just call_dispatch or be customised per fop to *
- * perform ops specific to that particular fop. *
- */
- ret = jbr_@NAME@_perform_local_op(frame, this, &op_errno, @SHORT_ARGS@);
- if (ret)
- goto err;
-
- return ret;
-err:
- if (local) {
- if (local->stub) {
- call_stub_destroy(local->stub);
- }
- if (local->qstub) {
- call_stub_destroy(local->qstub);
- }
- if (local->fd) {
- fd_unref(local->fd);
- }
- mem_put(local);
- }
- STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@);
- return 0;
-}
-
-/* template-name write-perform_local_op */
-int32_t jbr_@NAME@_perform_local_op(call_frame_t *frame, xlator_t *this,
- int *op_errno, @LONG_ARGS@)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
-
- ret = jbr_@NAME@_call_dispatch(frame, this, op_errno, @SHORT_ARGS@);
-
-out:
- return ret;
-}
-
-/* template-name write-call_dispatch */
-int32_t jbr_@NAME@_call_dispatch(call_frame_t *frame, xlator_t *this,
- int *op_errno, @LONG_ARGS@)
-{
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
-
-#if defined(JBR_CG_QUEUE)
- jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
- if (!ictx) {
- *op_errno = EIO;
- goto out;
- }
-
- LOCK(&ictx->lock);
- if (ictx->active) {
- gf_msg_debug(this->name, 0, "queuing request due to conflict");
- /*
- * TBD: enqueue only for real conflict
- *
- * Currently we just act like all writes are in
- * conflict with one another. What we should really do
- * is check the active/pending queues and defer only if
- * there's a conflict there.
- *
- * It's important to check the pending queue because we
- * might have an active request X which conflicts with
- * a pending request Y, and this request Z might
- * conflict with Y but not X. If we checked only the
- * active queue then Z could jump ahead of Y, which
- * would be incorrect.
- */
- local->qstub = fop_@NAME@_stub(frame, jbr_@NAME@_dispatch,
- @SHORT_ARGS@);
- if (!local->qstub) {
- UNLOCK(&ictx->lock);
- goto out;
- }
- list_add_tail(&local->qlinks, &ictx->pqueue);
- ++(ictx->pending);
- UNLOCK(&ictx->lock);
- ret = 0;
- goto out;
- } else {
- list_add_tail(&local->qlinks, &ictx->aqueue);
- ++(ictx->active);
- }
- UNLOCK(&ictx->lock);
-#endif
- ret = jbr_@NAME@_dispatch(frame, this, @SHORT_ARGS@);
-
-out:
- return ret;
-}
-
-/* template-name write-dispatch */
-int32_t jbr_@NAME@_dispatch(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
- xlator_list_t *trav;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- /*
- * TBD: unblock pending request(s) if we fail after this point but
- * before we get to jbr_@NAME@_complete (where that code currently
- * resides).
- */
-
- local->call_count = priv->n_children - 1;
- for (trav = this->children->next; trav; trav = trav->next) {
- STACK_WIND(frame, jbr_@NAME@_fan_in, trav->xlator,
- trav->xlator->fops->@NAME@, @SHORT_ARGS@);
- }
-
- /* TBD: variable Issue count */
- ret = 0;
-out:
- return ret;
-}
-
-/* template-name write-fan-in */
-int32_t jbr_@NAME@_fan_in(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
-{
- jbr_local_t *local = NULL;
- int32_t ret = -1;
- uint8_t call_count;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret,
- op_errno);
-
- LOCK(&frame->lock);
- call_count = --(local->call_count);
- if (op_ret != -1) {
- /* Increment the number of successful acks *
- * received for the operation. *
- */
- (local->successful_acks)++;
- local->successful_op_ret = op_ret;
- }
- gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
- op_ret, op_errno, local->successful_acks);
- UNLOCK(&frame->lock);
-
- /* TBD: variable Completion count */
- if (call_count == 0) {
- call_resume(local->stub);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/* template-name write-continue */
-int32_t jbr_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
-{
- int32_t ret = -1;
- gf_boolean_t result = _gf_false;
- jbr_local_t *local = NULL;
- jbr_local_t *new_local = NULL;
- jbr_private_t *priv = NULL;
- int32_t op_errno = 0;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- priv = this->private;
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- /* Perform quorum check to see if the leader needs *
- * to perform the operation. If the operation will not *
- * meet quorum irrespective of the leader's result *
- * there is no point in the leader performing the fop *
- */
- result = fop_quorum_check(this, (double)priv->n_children,
- (double)local->successful_acks + 1);
- if (result == _gf_false) {
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Didn't receive enough acks "
- "to meet quorum. Failing the operation without trying "
- "it on the leader.");
-
-#if defined(JBR_CG_QUEUE)
- /*
- * In case of a fop failure, before unwinding need to *
- * remove it from queue *
- */
- ret = jbr_remove_from_queue(frame, this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC,
- "Failed to remove from queue.");
- }
-#endif
-
- /*
- * In this case, the quorum is not met on the followers *
- * So the operation will not be performed on the leader *
- * and a rollback will be sent via GF_FOP_IPC to all the *
- * followers, where this particular fop's term and index *
- * numbers will be journaled, and later used to rollback *
- */
- call_frame_t *new_frame;
-
- new_frame = copy_frame(frame);
-
- if (new_frame) {
- new_local = mem_get0(this->local_pool);
- if (new_local) {
- INIT_LIST_HEAD(&new_local->qlinks);
- ret = dict_set_int32(local->xdata, "rollback-fop",
- GF_FOP_@UPNAME@);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set rollback-fop");
- } else {
- new_local->xdata = dict_ref(local->xdata);
- new_frame->local = new_local;
- jbr_ipc_call_dispatch(new_frame, this, &op_errno,
- FDL_IPC_JBR_SERVER_ROLLBACK,
- new_local->xdata);
- }
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Could not create local for new_frame");
- }
- } else {
- gf_log(this->name, GF_LOG_WARNING, "Could not send rollback ipc");
- }
-
- STACK_UNWIND_STRICT(@NAME@, frame, -1, EROFS, @ERROR_ARGS@);
- } else {
- STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
- }
-
-out:
- return 0;
-}
-
-/* template-name write-complete */
-int32_t jbr_@NAME@_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
-{
- int32_t ret = -1;
- gf_boolean_t result = _gf_false;
- jbr_private_t *priv = NULL;
- jbr_local_t *local = NULL;
- jbr_local_t *new_local = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, err);
- GF_VALIDATE_OR_GOTO(this->name, frame, err);
- priv = this->private;
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, priv, err);
- GF_VALIDATE_OR_GOTO(this->name, local, err);
-
- /* If the fop failed on the leader, then reduce one successful ack
- * before calculating the fop quorum
- */
- LOCK(&frame->lock);
- if (op_ret == -1)
- (local->successful_acks)--;
- UNLOCK(&frame->lock);
-
-#if defined(JBR_CG_QUEUE)
- ret = jbr_remove_from_queue(frame, this);
- if (ret)
- goto err;
-#endif
-
-#if defined(JBR_CG_FSYNC)
- jbr_mark_fd_dirty(this, local);
-#endif
-
-#if defined(JBR_CG_NEED_FD)
- fd_unref(local->fd);
-#endif
-
- /* After the leader completes the fop, a quorum check is *
- * performed, taking into account the outcome of the fop *
- * on the leader. Irrespective of the fop being successful *
- * or failing on the leader, the result of the quorum will *
- * determine if the overall fop is successful or not. For *
- * example, a fop might have succeeded on every node except *
- * the leader, in which case as quorum is being met, the fop *
- * will be treated as a successful fop, even though it failed *
- * on the leader. On follower nodes, no quorum check should *
- * be done, and the result is returned to the leader as is. *
- */
- if (priv->leader) {
- result = fop_quorum_check(this, (double)priv->n_children,
- (double)local->successful_acks + 1);
- if (result == _gf_false) {
- op_ret = -1;
- op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Quorum is not met. "
- "The operation has failed.");
- /*
- * In this case, the quorum is not met after the *
- * operation is performed on the leader. Hence a *
- * rollback will be sent via GF_FOP_IPC to the leader *
- * where this particular fop's term and index numbers *
- * will be journaled, and later used to rollback. *
- * The same will be done on all the followers *
- */
- call_frame_t *new_frame;
-
- new_frame = copy_frame(frame);
- if (new_frame) {
- new_local = mem_get0(this->local_pool);
- if (new_local) {
- INIT_LIST_HEAD(&new_local->qlinks);
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "op = %d", new_frame->op);
- ret = dict_set_int32(local->xdata, "rollback-fop",
- GF_FOP_@UPNAME@);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set "
- "rollback-fop");
- } else {
- new_local->xdata = dict_ref(local->xdata);
- new_frame->local = new_local;
- /*
- * Calling STACK_WIND instead *
- * of jbr_ipc as it will not *
- * unwind to the previous *
- * translators like it will *
- * in case of jbr_ipc. *
- */
- STACK_WIND(
- new_frame, jbr_ipc_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ipc,
- FDL_IPC_JBR_SERVER_ROLLBACK, new_local->xdata);
- }
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Could not create local "
- "for new_frame");
- }
- } else {
- gf_log(this->name, GF_LOG_WARNING,
- "Could not send rollback ipc");
- }
- } else {
-#if defined(JBR_CG_NEED_FD)
- op_ret = local->successful_op_ret;
-#else
- op_ret = 0;
-#endif
- op_errno = 0;
- gf_msg_debug(this->name, 0,
- "Quorum has met. The operation has succeeded.");
- }
- }
-
- /*
- * Unrefing the reference taken in jbr_@NAME@ () *
- */
- dict_unref(local->xdata);
-
- STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
-
- return 0;
-
-err:
- STACK_UNWIND_STRICT(@NAME@, frame, -1, 0, @SHORT_ARGS@);
-
- return 0;
-}
diff --git a/xlators/experimental/jbr-server/src/gen-fops.py b/xlators/experimental/jbr-server/src/gen-fops.py
deleted file mode 100755
index 616782bba45..00000000000
--- a/xlators/experimental/jbr-server/src/gen-fops.py
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/python3
-
-# This script generates the boilerplate versions of most fops and cbks in the
-# server. This allows the details of leadership-status checking, sequencing
-# between leader and followers (including fan-out), and basic error checking
-# to be centralized one place, with per-operation code kept to a minimum.
-
-from __future__ import print_function
-import os
-import re
-import string
-import sys
-
-curdir = os.path.dirname(sys.argv[0])
-gendir = os.path.join(curdir, '../../../../libglusterfs/src')
-sys.path.append(gendir)
-from generator import ops, fop_subs, cbk_subs, generate
-
-# We really want the callback argument list, even when we're generating fop
-# code, so we propagate here.
-# TBD: this should probably be right in generate.py
-for k, v in cbk_subs.items():
- fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@']
-
-# Stolen from old codegen.py
-def load_templates (path):
- templates = {}
- tmpl_re = re.compile("/\* template-name (.*) \*/")
- templates = {}
- t_name = None
- for line in open(path, "r").readlines():
- if not line:
- break
- m = tmpl_re.match(line)
- if m:
- if t_name:
- templates[t_name] = ''.join(t_contents)
- t_name = m.group(1).strip()
- t_contents = []
- elif t_name:
- t_contents.append(line)
- if t_name:
- templates[t_name] = ''.join(t_contents)
- return templates
-
-# We need two types of templates. The first, for pure read operations, just
-# needs to do a simple am-i-leader check (augmented to allow dirty reads).
-# The second, for pure writes, needs to do fan-out to followers between those
-# initial checks and local execution. There are other operations that don't
-# fit neatly into either category - e.g. lock ops or fsync - so we'll just have
-# to handle those manually. The table thus includes entries only for those we
-# can categorize. The special cases, plus any new operations we've never even
-# heard of, aren't in there.
-#
-# Various keywords can be used to define/undefine preprocessor symbols used
-# in the templates, on a per-function basis. For example, if the keyword here
-# is "fsync" (lowercase word or abbreviation) that will cause JBR_CG_FSYNC
-# (prefix plus uppercase version) to be defined above all of the generated code
-# for that fop.
-
-fop_table = {
- "access": "read",
- "create": "write",
- "discard": "write",
-# "entrylk": "read",
- "fallocate": "write",
-# "fentrylk": "read",
- "fgetxattr": "read",
-# "finodelk": "read",
-# "flush": "read",
- "fremovexattr": "write",
- "fsetattr": "write",
- "fsetxattr": "write",
- "fstat": "read",
-# "fsync": "read",
-# "fsyncdir": "read",
- "ftruncate": "write",
- "fxattrop": "write",
- "getxattr": "read",
-# "inodelk": "read",
- "link": "write",
- "lk": "write,queue",
-# "lookup": "read",
- "mkdir": "write",
- "mknod": "write",
- "open": "write",
- "opendir": "read",
- "rchecksum": "read",
- "readdir": "read",
- "readdirp": "read",
- "readlink": "read",
- "readv": "read",
- "removexattr": "write",
- "rename": "write",
- "rmdir": "write",
- "setattr": "write",
- "setxattr": "write",
- "stat": "read",
- "statfs": "read",
- "symlink": "write",
- "truncate": "write",
- "unlink": "write",
- "writev": "write,fsync,queue",
- "xattrop": "write",
- "ipc": "write",
-}
-
-# Mention those fops in the selective_generate table, for which
-# only a few common functions will be generated, and mention those
-# functions. Rest of the functions can be customized
-selective_generate = {
- "lk": "fop,dispatch,call_dispatch",
- "ipc": "dispatch,call_dispatch",
-}
-
-# Stolen from gen_fdl.py
-def gen_server (templates):
- fops_done = []
- for name in fop_table.keys():
- info = fop_table[name].split(",")
- kind = info[0]
- flags = info[1:]
-
- # generate all functions for the fops in fop_table
- # except for the ones in selective_generate for which
- # generate only the functions mentioned in the
- # selective_generate table
- gen_funcs = "fop,complete,continue,fan-in,dispatch, \
- call_dispatch,perform_local_op"
- if name in selective_generate:
- gen_funcs = selective_generate[name].split(",")
-
- if ("fsync" in flags) or ("queue" in flags):
- flags.append("need_fd")
- for fname in flags:
- print("#define JBR_CG_%s" % fname.upper())
-
- if 'complete' in gen_funcs:
- print(generate(templates[kind+"-complete"],
- name, cbk_subs))
-
- if 'continue' in gen_funcs:
- print(generate(templates[kind+"-continue"],
- name, fop_subs))
-
- if 'fan-in' in gen_funcs:
- print(generate(templates[kind+"-fan-in"],
- name, cbk_subs))
-
- if 'dispatch' in gen_funcs:
- print(generate(templates[kind+"-dispatch"],
- name, fop_subs))
-
- if 'call_dispatch' in gen_funcs:
- print(generate(templates[kind+"-call_dispatch"],
- name, fop_subs))
-
- if 'perform_local_op' in gen_funcs:
- print(generate(templates[kind+"-perform_local_op"],
- name, fop_subs))
-
- if 'fop' in gen_funcs:
- print(generate(templates[kind+"-fop"], name, fop_subs))
-
- for fname in flags:
- print("#undef JBR_CG_%s" % fname.upper())
- fops_done.append(name)
- # Just for fun, emit the fops table too.
- print("struct xlator_fops fops = {")
- for x in fops_done:
- print((" .%s = jbr_%s,"%(x, x)))
- print("};")
-
-tmpl = load_templates(sys.argv[1])
-for l in open(sys.argv[2], 'r').readlines():
- if l.find('#pragma generate') != -1:
- print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
- gen_server(tmpl)
- print("/* END GENERATED CODE */")
- else:
- print(l[:-1])
diff --git a/xlators/experimental/jbr-server/src/jbr-internal.h b/xlators/experimental/jbr-server/src/jbr-internal.h
deleted file mode 100644
index f225e988a5f..00000000000
--- a/xlators/experimental/jbr-server/src/jbr-internal.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#define LEADER_XATTR "user.jbr.leader"
-#define SECOND_CHILD(xl) (xl->children->next->xlator)
-#define RECONCILER_PATH JBR_SCRIPT_PREFIX "/reconciler.py"
-#define CHANGELOG_ENTRY_SIZE 128
-
-enum {
- gf_mt_jbr_private_t = gf_common_mt_end + 1,
- gf_mt_jbr_fd_ctx_t,
- gf_mt_jbr_inode_ctx_t,
- gf_mt_jbr_dirty_t,
- gf_mt_jbr_end
-};
-
-typedef enum jbr_recon_notify_ev_id_t {
- JBR_RECON_SET_LEADER = 1,
- JBR_RECON_ADD_CHILD = 2
-} jbr_recon_notify_ev_id_t;
-
-typedef struct _jbr_recon_notify_ev_s {
- jbr_recon_notify_ev_id_t id;
- uint32_t index; /* in case of add */
- struct list_head list;
-} jbr_recon_notify_ev_t;
-
-typedef struct {
- /*
- * This is a hack to allow a non-leader to accept requests while the
- * leader is down, and it only works for n=2. The way it works is that
- * "config_leader" indicates the state from our options (via init or
- * reconfigure) but "leader" is what the fop code actually looks at. If
- * config_leader is true, then leader will *always* be true as well,
- * giving that brick precedence. If config_leader is false, then
- * leader will only be true if there is no connection to the other
- * brick (tracked in jbr_notify).
- *
- * TBD: implement real leader election
- */
- gf_boolean_t config_leader;
- gf_boolean_t leader;
- uint8_t up_children;
- uint8_t n_children;
- char *vol_file;
- uint32_t current_term;
- uint32_t kid_state;
- gf_lock_t dirty_lock;
- struct list_head dirty_fds;
- uint32_t index;
- gf_lock_t index_lock;
- double quorum_pct;
- int term_fd;
- long term_total;
- long term_read;
- /*
- * This is a super-duper hack, but it will do for now. The reason it's
- * a hack is that we pass this to dict_set_static_bin, so we don't have
- * to mess around with allocating and freeing it on every single IPC
- * request, but it's totally not thread-safe. On the other hand, there
- * should only be one reconciliation thread running and calling these
- * functions at a time, so maybe that doesn't matter.
- *
- * TBD: re-evaluate how to manage this
- */
- char term_buf[CHANGELOG_ENTRY_SIZE];
- gf_boolean_t child_up; /* To maintain the state of *
- * the translator */
-} jbr_private_t;
-
-typedef struct {
- call_stub_t *stub;
- call_stub_t *qstub;
- uint32_t call_count;
- uint32_t successful_acks;
- uint32_t successful_op_ret;
- fd_t *fd;
- struct list_head qlinks;
- dict_t *xdata;
-} jbr_local_t;
-
-/*
- * This should match whatever changelog returns on the pre-op for us to pass
- * when we're ready for our post-op.
- */
-typedef uint32_t log_id_t;
-
-typedef struct {
- struct list_head links;
- log_id_t id;
-} jbr_dirty_list_t;
-
-typedef struct {
- fd_t *fd;
- struct list_head dirty_list;
- struct list_head fd_list;
-} jbr_fd_ctx_t;
-
-typedef struct {
- gf_lock_t lock;
- uint32_t active;
- struct list_head aqueue;
- uint32_t pending;
- struct list_head pqueue;
-} jbr_inode_ctx_t;
-
-void
-jbr_start_reconciler(xlator_t *this);
diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c
deleted file mode 100644
index 31df6ad5a1e..00000000000
--- a/xlators/experimental/jbr-server/src/jbr.c
+++ /dev/null
@@ -1,1676 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <fnmatch.h>
-#include "call-stub.h"
-#include "defaults.h"
-#include "xlator.h"
-#include "glfs.h"
-#include "glfs-internal.h"
-#include "run.h"
-#include "common-utils.h"
-#include "syncop.h"
-#include "syscall.h"
-#include "compat-errno.h"
-#include "fdl.h"
-
-#include "jbr-internal.h"
-#include "jbr-messages.h"
-
-#define JBR_FLUSH_INTERVAL 5
-
-enum {
- /* echo "cluster/jbr-server" | md5sum | cut -c 1-8 */
- JBR_SERVER_IPC_BASE = 0x0e2d66a5,
- JBR_SERVER_TERM_RANGE,
- JBR_SERVER_OPEN_TERM,
- JBR_SERVER_NEXT_ENTRY
-};
-
-/*
- * Need to declare jbr_lk_call_dispatch as jbr_lk_continue and *
- * jbr_lk_perform_local_op call it, before code is generated. *
- */
-int32_t
-jbr_lk_call_dispatch(call_frame_t *frame, xlator_t *this, int *op_errno,
- fd_t *fd, int32_t cmd, struct gf_flock *lock,
- dict_t *xdata);
-
-int32_t
-jbr_lk_dispatch(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata);
-
-int32_t
-jbr_ipc_call_dispatch(call_frame_t *frame, xlator_t *this, int *op_errno,
- int32_t op, dict_t *xdata);
-
-int32_t
-jbr_ipc_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-/* Used to check the quorum of acks received after the fop
- * confirming the status of the fop on all the brick processes
- * for this particular subvolume
- */
-gf_boolean_t
-fop_quorum_check(xlator_t *this, double n_children, double current_state)
-{
- jbr_private_t *priv = NULL;
- gf_boolean_t result = _gf_false;
- double required = 0;
- double current = 0;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
-
- required = n_children * priv->quorum_pct;
-
- /*
- * Before performing the fop on the leader, we need to check,
- * if there is any merit in performing the fop on the leader.
- * In a case, where even a successful write on the leader, will
- * not meet quorum, there is no point in trying the fop on the
- * leader.
- * When this function is called after the leader has tried
- * performing the fop, this check will calculate quorum taking into
- * account the status of the fop on the leader. If the leader's
- * op_ret was -1, the complete function would account that by
- * decrementing successful_acks by 1
- */
-
- current = current_state * 100.0;
-
- if (current < required) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_QUORUM_NOT_MET,
- "Quorum not met. quorum_pct = %f "
- "Current State = %f, Required State = %f",
- priv->quorum_pct, current, required);
- } else
- result = _gf_true;
-
-out:
- return result;
-}
-
-jbr_inode_ctx_t *
-jbr_get_inode_ctx(xlator_t *this, inode_t *inode)
-{
- uint64_t ctx_int = 0LL;
- jbr_inode_ctx_t *ctx_ptr;
-
- if (__inode_ctx_get(inode, this, &ctx_int) == 0) {
- ctx_ptr = (jbr_inode_ctx_t *)(long)ctx_int;
- } else {
- ctx_ptr = GF_CALLOC(1, sizeof(*ctx_ptr), gf_mt_jbr_inode_ctx_t);
- if (ctx_ptr) {
- ctx_int = (uint64_t)(long)ctx_ptr;
- if (__inode_ctx_set(inode, this, &ctx_int) == 0) {
- LOCK_INIT(&ctx_ptr->lock);
- INIT_LIST_HEAD(&ctx_ptr->aqueue);
- INIT_LIST_HEAD(&ctx_ptr->pqueue);
- } else {
- GF_FREE(ctx_ptr);
- ctx_ptr = NULL;
- }
- }
- }
-
- return ctx_ptr;
-}
-
-jbr_fd_ctx_t *
-jbr_get_fd_ctx(xlator_t *this, fd_t *fd)
-{
- uint64_t ctx_int = 0LL;
- jbr_fd_ctx_t *ctx_ptr;
-
- if (__fd_ctx_get(fd, this, &ctx_int) == 0) {
- ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int;
- } else {
- ctx_ptr = GF_CALLOC(1, sizeof(*ctx_ptr), gf_mt_jbr_fd_ctx_t);
- if (ctx_ptr) {
- if (__fd_ctx_set(fd, this, (uint64_t)(uintptr_t)ctx_ptr) == 0) {
- INIT_LIST_HEAD(&ctx_ptr->dirty_list);
- INIT_LIST_HEAD(&ctx_ptr->fd_list);
- } else {
- GF_FREE(ctx_ptr);
- ctx_ptr = NULL;
- }
- }
- }
-
- return ctx_ptr;
-}
-
-void
-jbr_mark_fd_dirty(xlator_t *this, jbr_local_t *local)
-{
- fd_t *fd = local->fd;
- jbr_fd_ctx_t *ctx_ptr;
- jbr_dirty_list_t *dirty;
- jbr_private_t *priv = this->private;
-
- /*
- * TBD: don't do any of this for O_SYNC/O_DIRECT writes.
- * Unfortunately, that optimization requires that we distinguish
- * between writev and other "write" calls, saving the original flags
- * and checking them in the callback. Too much work for too little
- * gain right now.
- */
-
- LOCK(&fd->lock);
- ctx_ptr = jbr_get_fd_ctx(this, fd);
- dirty = GF_CALLOC(1, sizeof(*dirty), gf_mt_jbr_dirty_t);
- if (ctx_ptr && dirty) {
- gf_msg_trace(this->name, 0, "marking fd %p as dirty (%p)", fd, dirty);
- /* TBD: fill dirty->id from what changelog gave us */
- list_add_tail(&dirty->links, &ctx_ptr->dirty_list);
- if (list_empty(&ctx_ptr->fd_list)) {
- /* Add a ref so _release doesn't get called. */
- ctx_ptr->fd = fd_ref(fd);
- LOCK(&priv->dirty_lock);
- list_add_tail(&ctx_ptr->fd_list, &priv->dirty_fds);
- UNLOCK(&priv->dirty_lock);
- }
- } else {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
- "could not mark %p dirty", fd);
- if (ctx_ptr) {
- GF_FREE(ctx_ptr);
- }
- if (dirty) {
- GF_FREE(dirty);
- }
- }
- UNLOCK(&fd->lock);
-}
-
-#define JBR_TERM_XATTR "trusted.jbr.term"
-#define JBR_INDEX_XATTR "trusted.jbr.index"
-#define JBR_REP_COUNT_XATTR "trusted.jbr.rep-count"
-#define RECON_TERM_XATTR "trusted.jbr.recon-term"
-#define RECON_INDEX_XATTR "trusted.jbr.recon-index"
-
-int32_t
-jbr_leader_checks_and_init(call_frame_t *frame, xlator_t *this, int *op_errno,
- dict_t *xdata, fd_t *fd)
-{
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
- gf_boolean_t result = _gf_false;
- int from_leader = _gf_false;
- int from_recon = _gf_false;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
-
- /*
- * Our first goal here is to avoid "split brain surprise" for users who
- * specify exactly 50% with two- or three-way replication. That means
- * either a more-than check against half the total replicas or an
- * at-least check against half of our peers (one less). Of the two,
- * only an at-least check supports the intuitive use of 100% to mean
- * all replicas must be present, because "more than 100%" will never
- * succeed regardless of which count we use. This leaves us with a
- * slightly non-traditional definition of quorum ("at least X% of peers
- * not including ourselves") but one that's useful enough to be worth
- * it.
- *
- * Note that n_children and up_children *do* include the local
- * subvolume, so we need to subtract one in each case.
- */
- if (priv->leader) {
- result = fop_quorum_check(this, (double)(priv->n_children - 1),
- (double)(priv->up_children - 1));
-
- if (result == _gf_false) {
- /* Emulate the AFR client-side-quorum behavior. */
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Sufficient number of "
- "subvolumes are not up to meet quorum.");
- *op_errno = EROFS;
- goto out;
- }
- } else {
- if (xdata) {
- from_leader = !!dict_get(xdata, JBR_TERM_XATTR);
- from_recon = !!dict_get(xdata, RECON_TERM_XATTR) &&
- !!dict_get(xdata, RECON_INDEX_XATTR);
- } else {
- from_leader = from_recon = _gf_false;
- }
-
- /* follower/recon path *
- * just send it to local node *
- */
- if (!from_leader && !from_recon) {
- *op_errno = EREMOTE;
- goto out;
- }
- }
-
- local = mem_get0(this->local_pool);
- if (!local) {
- goto out;
- }
-
- if (fd)
- local->fd = fd_ref(fd);
- else
- local->fd = NULL;
-
- INIT_LIST_HEAD(&local->qlinks);
- local->successful_acks = 0;
- frame->local = local;
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_initialize_xdata_set_attrs(xlator_t *this, dict_t **xdata)
-{
- jbr_private_t *priv = NULL;
- int32_t ret = -1;
- uint32_t ti = 0;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, xdata, out);
-
- if (!*xdata) {
- *xdata = dict_new();
- if (!*xdata) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR,
- "failed to allocate xdata");
- goto out;
- }
- }
-
- if (dict_set_int32(*xdata, JBR_TERM_XATTR, priv->current_term) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set jbr-term");
- goto out;
- }
-
- LOCK(&priv->index_lock);
- ti = ++(priv->index);
- UNLOCK(&priv->index_lock);
- if (dict_set_int32(*xdata, JBR_INDEX_XATTR, ti) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set index");
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_remove_from_queue(call_frame_t *frame, xlator_t *this)
-{
- int32_t ret = -1;
- jbr_inode_ctx_t *ictx = NULL;
- jbr_local_t *local = NULL;
- jbr_local_t *next = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- if (local->qlinks.next != &local->qlinks) {
- list_del(&local->qlinks);
- ictx = jbr_get_inode_ctx(this, local->fd->inode);
- if (ictx) {
- LOCK(&ictx->lock);
- if (ictx->pending) {
- /*
- * TBD: dequeue *all* non-conflicting
- * reqs
- *
- * With the stub implementation there
- * can only be one request active at a
- * time (zero here) so it's not an
- * issue. In a real implementation
- * there might still be other active
- * requests to check against, and
- * multiple pending requests that could
- * continue.
- */
- gf_msg_debug(this->name, 0, "unblocking next request");
- --(ictx->pending);
- next = list_entry(ictx->pqueue.next, jbr_local_t, qlinks);
- list_del(&next->qlinks);
- list_add_tail(&next->qlinks, &ictx->aqueue);
- call_resume(next->qstub);
- } else {
- --(ictx->active);
- }
- UNLOCK(&ictx->lock);
- }
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-int32_t
-jbr_lk_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
- dict_t *xdata)
-{
- int32_t ret = -1;
- jbr_private_t *priv = NULL;
- jbr_local_t *local = NULL;
- gf_boolean_t result = _gf_false;
-
- GF_VALIDATE_OR_GOTO("jbr", this, err);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, err);
- GF_VALIDATE_OR_GOTO(this->name, frame, err);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, err);
- GF_VALIDATE_OR_GOTO(this->name, flock, err);
- GF_VALIDATE_OR_GOTO(this->name, xdata, err);
-
- /*
- * Remove from queue for unlock operation only *
- * For lock operation, it will be done in fan-in *
- */
- if (flock->l_type == F_UNLCK) {
- ret = jbr_remove_from_queue(frame, this);
- if (ret)
- goto err;
- }
-
- /*
- * On a follower, unwind with the op_ret and op_errno. On a *
- * leader, if the fop is a locking fop, and its a failure, *
- * send fail, else call stub which will dispatch the fop to *
- * the followers. *
- * *
- * If the fop is a unlocking fop, check quorum. If quorum *
- * is met, then send success. Else Rollback on leader, *
- * followed by followers, and then send -ve ack to client. *
- */
- if (priv->leader) {
- /* Increase the successful acks if it's a success. */
- LOCK(&frame->lock);
- if (op_ret != -1)
- (local->successful_acks)++;
- UNLOCK(&frame->lock);
-
- if (flock->l_type == F_UNLCK) {
- result = fop_quorum_check(this, (double)priv->n_children,
- (double)local->successful_acks);
- if (result == _gf_false) {
- op_ret = -1;
- op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Quorum is not met. "
- "The operation has failed.");
-
- /* TODO: PERFORM UNLOCK ROLLBACK ON LEADER *
- * FOLLOWED BY FOLLOWERS. */
- } else {
- op_ret = 0;
- op_errno = 0;
- }
-
- fd_unref(local->fd);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
- } else {
- if (op_ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_LOCK_FAILURE,
- "The lock operation failed on "
- "the leader.");
-
- fd_unref(local->fd);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
- } else {
- if (!local->stub) {
- goto err;
- }
-
- call_resume(local->stub);
- }
- }
- } else {
- fd_unref(local->fd);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
- }
-
- return 0;
-
-err:
- if (local) {
- if (local->stub) {
- call_stub_destroy(local->stub);
- }
- if (local->qstub) {
- call_stub_destroy(local->qstub);
- }
- if (local->fd) {
- fd_unref(local->fd);
- }
- mem_put(local);
- }
- STACK_UNWIND_STRICT(lk, frame, -1, op_errno, flock, xdata);
- return 0;
-}
-
-int32_t
-jbr_lk_fan_in(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct gf_flock *flock, dict_t *xdata)
-{
- uint8_t call_count = -1;
- int32_t ret = -1;
- gf_boolean_t result = _gf_false;
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- priv = this->private;
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret,
- op_errno);
-
- LOCK(&frame->lock);
- call_count = --(local->call_count);
- if (op_ret != -1) {
- /* Increment the number of successful acks *
- * received for the operation. *
- */
- (local->successful_acks)++;
- local->successful_op_ret = op_ret;
- }
- gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n",
- op_ret, op_errno, local->successful_acks);
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
- /*
- * If the fop is a locking fop, then check quorum. If quorum *
- * is met, send successful ack to the client. If quorum is *
- * not met, then rollback locking on followers, followed by *
- * rollback of locking on leader, and then sending -ve ack *
- * to the client. *
- * *
- * If the fop is a unlocking fop, then call stub. *
- */
- if (flock->l_type == F_UNLCK) {
- call_resume(local->stub);
- } else {
- /*
- * Remove from queue for locking fops, for unlocking *
- * fops, it is taken care of in jbr_lk_complete *
- */
- ret = jbr_remove_from_queue(frame, this);
- if (ret)
- goto out;
-
- fd_unref(local->fd);
-
- result = fop_quorum_check(this, (double)priv->n_children,
- (double)local->successful_acks);
- if (result == _gf_false) {
- gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET,
- "Didn't receive enough acks to meet "
- "quorum. Failing the locking "
- "operation and initiating rollback on "
- "followers and the leader "
- "respectively.");
-
- /* TODO: PERFORM ROLLBACK OF LOCKING ON
- * FOLLOWERS, FOLLOWED BY ROLLBACK ON
- * LEADER.
- */
-
- STACK_UNWIND_STRICT(lk, frame, -1, EROFS, flock, xdata);
- } else {
- STACK_UNWIND_STRICT(lk, frame, 0, 0, flock, xdata);
- }
- }
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/*
- * Called from leader for locking fop, being written as a separate
- * function so as to support queues.
- */
-int32_t
-jbr_perform_lk_on_leader(call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *flock, dict_t *xdata)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- GF_VALIDATE_OR_GOTO(this->name, flock, out);
- GF_VALIDATE_OR_GOTO(this->name, fd, out);
-
- STACK_WIND(frame, jbr_lk_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata);
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_lk_perform_local_op(call_frame_t *frame, xlator_t *this, int *op_errno,
- fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata)
-{
- int32_t ret = -1;
- jbr_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
- GF_VALIDATE_OR_GOTO(this->name, fd, out);
- GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
- GF_VALIDATE_OR_GOTO(this->name, flock, out);
-
- /*
- * Check if the fop is a locking fop or unlocking fop, and
- * handle it accordingly. If it is a locking fop, take the
- * lock on leader first, and then send it to the followers.
- * If it is a unlocking fop, unlock the followers first,
- * and then on meeting quorum perform the unlock on the leader.
- */
- if (flock->l_type == F_UNLCK) {
- ret = jbr_lk_call_dispatch(frame, this, op_errno, fd, cmd, flock,
- xdata);
- if (ret)
- goto out;
- } else {
- jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode);
-
- if (!ictx) {
- *op_errno = EIO;
- goto out;
- }
-
- LOCK(&ictx->lock);
- if (ictx->active) {
- gf_msg_debug(this->name, 0, "queuing request due to conflict");
-
- local->qstub = fop_lk_stub(frame, jbr_perform_lk_on_leader, fd, cmd,
- flock, xdata);
- if (!local->qstub) {
- UNLOCK(&ictx->lock);
- goto out;
- }
- list_add_tail(&local->qlinks, &ictx->pqueue);
- ++(ictx->pending);
- UNLOCK(&ictx->lock);
- ret = 0;
- goto out;
- } else {
- list_add_tail(&local->qlinks, &ictx->aqueue);
- ++(ictx->active);
- }
- UNLOCK(&ictx->lock);
- ret = jbr_perform_lk_on_leader(frame, this, fd, cmd, flock, xdata);
- if (ret == -1)
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_lk_continue(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata)
-{
- int32_t ret = -1;
- jbr_local_t *local = NULL;
- jbr_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- priv = this->private;
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
- GF_VALIDATE_OR_GOTO(this->name, local, out);
- GF_VALIDATE_OR_GOTO(this->name, flock, out);
- GF_VALIDATE_OR_GOTO(this->name, fd, out);
- GF_VALIDATE_OR_GOTO(this->name, xdata, out);
-
- /*
- * If it's a locking fop, then call dispatch to followers *
- * If it's a unlock fop, then perform the unlock operation *
- */
- if (flock->l_type == F_UNLCK) {
- STACK_WIND(frame, jbr_lk_complete, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata);
- } else {
- /*
- * Directly call jbr_lk_dispatch instead of appending *
- * in queue, which is done at jbr_lk_perform_local_op *
- * for locking fops *
- */
- ret = jbr_lk_dispatch(frame, this, fd, cmd, flock, xdata);
- if (ret) {
- STACK_UNWIND_STRICT(lk, frame, -1, 0, flock, xdata);
- goto out;
- }
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-uint8_t
-jbr_count_up_kids(jbr_private_t *priv)
-{
- uint8_t retval = 0;
- uint8_t i;
-
- for (i = 0; i < priv->n_children; ++i) {
- if (priv->kid_state & (1 << i)) {
- ++retval;
- }
- }
-
- return retval;
-}
-
-/*
- * The fsync machinery looks a lot like that for any write call, but there are
- * some important differences that are easy to miss. First, we don't care
- * about the xdata that shows whether the call came from a leader or
- * reconciliation process. If we're the leader we fan out; if we're not we
- * don't. Second, we don't wait for followers before we issue the local call.
- * The code generation system could be updated to handle this, and still might
- * if we need to implement other "almost identical" paths (e.g. for open), but
- * a copy is more readable as long as it's just one.
- */
-
-int32_t
-jbr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- jbr_local_t *local = frame->local;
- gf_boolean_t unwind;
-
- LOCK(&frame->lock);
- unwind = !--(local->call_count);
- UNLOCK(&frame->lock);
-
- if (unwind) {
- STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- }
- return 0;
-}
-
-int32_t
-jbr_fsync_local_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- jbr_dirty_list_t *dirty;
- jbr_dirty_list_t *dtmp;
- jbr_local_t *local = frame->local;
-
- list_for_each_entry_safe(dirty, dtmp, &local->qlinks, links)
- {
- gf_msg_trace(this->name, 0, "sending post-op on %p (%p)", local->fd,
- dirty);
- GF_FREE(dirty);
- }
-
- return jbr_fsync_cbk(frame, cookie, this, op_ret, op_errno, prebuf, postbuf,
- xdata);
-}
-
-int32_t
-jbr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- jbr_private_t *priv = this->private;
- jbr_local_t *local;
- uint64_t ctx_int = 0LL;
- jbr_fd_ctx_t *ctx_ptr;
- xlator_list_t *trav;
-
- local = mem_get0(this->local_pool);
- if (!local) {
- STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, NULL, NULL, xdata);
- return 0;
- }
- INIT_LIST_HEAD(&local->qlinks);
- frame->local = local;
-
- /* Move the dirty list from the fd to the fsync request. */
- LOCK(&fd->lock);
- if (__fd_ctx_get(fd, this, &ctx_int) == 0) {
- ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int;
- list_splice_init(&ctx_ptr->dirty_list, &local->qlinks);
- }
- UNLOCK(&fd->lock);
-
- /* Issue the local call. */
- local->call_count = priv->leader ? priv->n_children : 1;
- STACK_WIND(frame, jbr_fsync_local_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
-
- /* Issue remote calls if we're the leader. */
- if (priv->leader) {
- for (trav = this->children->next; trav; trav = trav->next) {
- STACK_WIND(frame, jbr_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- }
- }
-
- return 0;
-}
-
-int32_t
-jbr_getxattr_special(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- dict_t *result;
- jbr_private_t *priv = this->private;
-
- if (!priv->leader) {
- STACK_UNWIND_STRICT(getxattr, frame, -1, EREMOTE, NULL, NULL);
- return 0;
- }
-
- if (!name || (strcmp(name, JBR_REP_COUNT_XATTR) != 0)) {
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
- }
-
- result = dict_new();
- if (!result) {
- goto dn_failed;
- }
-
- priv->up_children = jbr_count_up_kids(this->private);
- if (dict_set_uint32(result, JBR_REP_COUNT_XATTR, priv->up_children) != 0) {
- goto dsu_failed;
- }
-
- STACK_UNWIND_STRICT(getxattr, frame, 0, 0, result, NULL);
- dict_unref(result);
- return 0;
-
-dsu_failed:
- dict_unref(result);
-dn_failed:
- STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
-
-void
-jbr_flush_fd(xlator_t *this, jbr_fd_ctx_t *fd_ctx)
-{
- jbr_dirty_list_t *dirty;
- jbr_dirty_list_t *dtmp;
-
- list_for_each_entry_safe(dirty, dtmp, &fd_ctx->dirty_list, links)
- {
- gf_msg_trace(this->name, 0, "sending post-op on %p (%p)", fd_ctx->fd,
- dirty);
- GF_FREE(dirty);
- }
-
- INIT_LIST_HEAD(&fd_ctx->dirty_list);
-}
-
-void *
-jbr_flush_thread(void *ctx)
-{
- xlator_t *this = ctx;
- jbr_private_t *priv = this->private;
- struct list_head dirty_fds;
- jbr_fd_ctx_t *fd_ctx;
- jbr_fd_ctx_t *fd_tmp;
- int ret;
-
- for (;;) {
- /*
- * We have to be very careful to avoid lock inversions here, so
- * we can't just hold priv->dirty_lock while we take and
- * release locks for each fd. Instead, we only hold dirty_lock
- * at the beginning of each iteration, as we (effectively) make
- * a copy of the current list head and then clear the original.
- * This leads to four scenarios for adding the first entry to
- * an fd and potentially putting it on the global list.
- *
- * (1) While we're asleep. No lock contention, it just gets
- * added and will be processed on the next iteration.
- *
- * (2) After we've made a local copy, but before we've started
- * processing that fd. The new entry will be added to the
- * fd (under its lock), and we'll process it on the current
- * iteration.
- *
- * (3) While we're processing the fd. They'll block on the fd
- * lock, then see that the list is empty and put it on the
- * global list. We'll process it here on the next
- * iteration.
- *
- * (4) While we're working, but after we've processed that fd.
- * Same as (1) as far as that fd is concerned.
- */
- INIT_LIST_HEAD(&dirty_fds);
- LOCK(&priv->dirty_lock);
- list_splice_init(&priv->dirty_fds, &dirty_fds);
- UNLOCK(&priv->dirty_lock);
-
- list_for_each_entry_safe(fd_ctx, fd_tmp, &dirty_fds, fd_list)
- {
- ret = syncop_fsync(FIRST_CHILD(this), fd_ctx->fd, 0, NULL, NULL,
- NULL, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, J_MSG_SYS_CALL_FAILURE,
- "failed to fsync %p (%d)", fd_ctx->fd, -ret);
- }
-
- LOCK(&fd_ctx->fd->lock);
- jbr_flush_fd(this, fd_ctx);
- list_del_init(&fd_ctx->fd_list);
- UNLOCK(&fd_ctx->fd->lock);
- fd_unref(fd_ctx->fd);
- }
-
- sleep(JBR_FLUSH_INTERVAL);
- }
-
- return NULL;
-}
-
-int32_t
-jbr_get_changelog_dir(xlator_t *this, char **cl_dir_p)
-{
- xlator_t *cl_xl;
-
- /* Find our changelog translator. */
- cl_xl = this;
- while (cl_xl) {
- if (strcmp(cl_xl->type, "features/changelog") == 0) {
- break;
- }
- cl_xl = cl_xl->children->xlator;
- }
- if (!cl_xl) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INIT_FAIL,
- "failed to find changelog translator");
- return ENOENT;
- }
-
- /* Find the actual changelog directory. */
- if (dict_get_str(cl_xl->options, "changelog-dir", cl_dir_p) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INIT_FAIL,
- "failed to find changelog-dir for %s", cl_xl->name);
- return ENODATA;
- }
-
- return 0;
-}
-
-void
-jbr_get_terms(call_frame_t *frame, xlator_t *this)
-{
- int32_t op_errno = 0;
- char *cl_dir = NULL;
- int32_t term_first = -1;
- int32_t term_contig = -1;
- int32_t term_last = -1;
- int term_num = 0;
- char *probe_str = NULL;
- dict_t *my_xdata = NULL;
- DIR *fp = NULL;
- struct dirent *entry = NULL;
- struct dirent scratch[2] = {
- {
- 0,
- },
- };
-
- op_errno = jbr_get_changelog_dir(this, &cl_dir);
- if (op_errno) {
- goto err; /* Error was already logged. */
- }
- op_errno = ENODATA; /* Most common error after this. */
-
- fp = sys_opendir(cl_dir);
- if (!fp) {
- op_errno = errno;
- goto err;
- }
-
- /* Find first and last terms. */
- for (;;) {
- errno = 0;
- entry = sys_readdir(fp, scratch);
- if (!entry || errno != 0) {
- if (errno != 0) {
- op_errno = errno;
- goto err;
- }
- break;
- }
-
- if (fnmatch("TERM.*", entry->d_name, FNM_PATHNAME) != 0) {
- continue;
- }
- /* +5 points to the character after the period */
- term_num = atoi(entry->d_name + 5);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, "%s => %d",
- entry->d_name, term_num);
- if (term_num < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INVALID,
- "invalid term file name %s", entry->d_name);
- op_errno = EINVAL;
- goto err;
- }
- if ((term_first < 0) || (term_first > term_num)) {
- term_first = term_num;
- }
- if ((term_last < 0) || (term_last < term_num)) {
- term_last = term_num;
- }
- }
- if ((term_first < 0) || (term_last < 0)) {
- /* TBD: are we *sure* there should always be at least one? */
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, "no terms found");
- op_errno = EINVAL;
- goto err;
- }
-
- (void)sys_closedir(fp);
- fp = NULL;
-
- /*
- * Find term_contig, which is the earliest term for which there are
- * no gaps between it and term_last.
- */
- for (term_contig = term_last; term_contig > 0; --term_contig) {
- if (gf_asprintf(&probe_str, "%s/TERM.%d", cl_dir, term_contig - 1) <=
- 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to format term %d", term_contig - 1);
- goto err;
- }
- if (sys_access(probe_str, F_OK) != 0) {
- GF_FREE(probe_str);
- probe_str = NULL;
- break;
- }
- GF_FREE(probe_str);
- probe_str = NULL;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, "found terms %d-%d (%d)",
- term_first, term_last, term_contig);
-
- /* Return what we've found */
- my_xdata = dict_new();
- if (!my_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to allocate reply dictionary");
- goto err;
- }
- if (dict_set_int32(my_xdata, "term-first", term_first) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set term-first");
- goto err;
- }
- if (dict_set_int32(my_xdata, "term-contig", term_contig) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set term-contig");
- goto err;
- }
- if (dict_set_int32(my_xdata, "term-last", term_last) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to set term-last");
- goto err;
- }
-
- /* Finally! */
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, my_xdata);
- dict_unref(my_xdata);
- return;
-
-err:
- if (fp) {
- (void)sys_closedir(fp);
- }
- if (my_xdata) {
- dict_unref(my_xdata);
- }
-
- if (probe_str)
- GF_FREE(probe_str);
-
- STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL);
-}
-
-long
-get_entry_count(xlator_t *this, int fd)
-{
- struct stat buf;
- long min; /* last entry not known to be empty */
- long max; /* first entry known to be empty */
- long curr;
- char entry[CHANGELOG_ENTRY_SIZE];
-
- if (sys_fstat(fd, &buf) < 0) {
- return -1;
- }
-
- min = 0;
- max = buf.st_size / CHANGELOG_ENTRY_SIZE;
-
- while ((min + 1) < max) {
- curr = (min + max) / 2;
- if (sys_lseek(fd, curr * CHANGELOG_ENTRY_SIZE, SEEK_SET) < 0) {
- return -1;
- }
- if (sys_read(fd, entry, sizeof(entry)) != sizeof(entry)) {
- return -1;
- }
- if ((entry[0] == '_') && (entry[1] == 'P')) {
- min = curr;
- } else {
- max = curr;
- }
- }
-
- if (sys_lseek(fd, 0, SEEK_SET) < 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, J_MSG_SYS_CALL_FAILURE,
- "failed to reset offset");
- }
- return max;
-}
-
-void
-jbr_open_term(call_frame_t *frame, xlator_t *this, dict_t *xdata)
-{
- int32_t op_errno;
- char *cl_dir;
- char *term;
- char *path = NULL;
- jbr_private_t *priv = this->private;
-
- op_errno = jbr_get_changelog_dir(this, &cl_dir);
- if (op_errno) {
- goto err;
- }
-
- if (dict_get_str(xdata, "term", &term) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, "missing term");
- op_errno = ENODATA;
- goto err;
- }
-
- if (gf_asprintf(&path, "%s/TERM.%s", cl_dir, term) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to construct path");
- op_errno = ENOMEM;
- goto err;
- }
-
- if (priv->term_fd >= 0) {
- sys_close(priv->term_fd);
- }
- priv->term_fd = open(path, O_RDONLY);
- if (priv->term_fd < 0) {
- op_errno = errno;
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
- "failed to open term file");
- goto err;
- }
-
- priv->term_total = get_entry_count(this, priv->term_fd);
- if (priv->term_total < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA,
- "failed to get entry count");
- sys_close(priv->term_fd);
- priv->term_fd = -1;
- op_errno = EIO;
- goto err;
- }
- priv->term_read = 0;
-
- /* Success! */
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL);
- GF_FREE(path);
- return;
-
-err:
- STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL);
- GF_FREE(path);
-}
-
-void
-jbr_next_entry(call_frame_t *frame, xlator_t *this)
-{
- int32_t op_errno = ENOMEM;
- jbr_private_t *priv = this->private;
- ssize_t nbytes;
- dict_t *my_xdata;
-
- if (priv->term_fd < 0) {
- op_errno = EBADFD;
- goto err;
- }
-
- if (priv->term_read >= priv->term_total) {
- op_errno = ENODATA;
- goto err;
- }
-
- nbytes = sys_read(priv->term_fd, priv->term_buf, CHANGELOG_ENTRY_SIZE);
- if (nbytes < CHANGELOG_ENTRY_SIZE) {
- if (nbytes < 0) {
- op_errno = errno;
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
- "error reading next entry: %s", strerror(errno));
- } else {
- op_errno = EIO;
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
- "got %zd/%d bytes for next entry", nbytes,
- CHANGELOG_ENTRY_SIZE);
- }
- goto err;
- }
- ++(priv->term_read);
-
- my_xdata = dict_new();
- if (!my_xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to allocate reply xdata");
- goto err;
- }
-
- if (dict_set_static_bin(my_xdata, "data", priv->term_buf,
- CHANGELOG_ENTRY_SIZE) != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR,
- "failed to assign reply xdata");
- goto err;
- }
-
- STACK_UNWIND_STRICT(ipc, frame, 0, 0, my_xdata);
- dict_unref(my_xdata);
- return;
-
-err:
- STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL);
-}
-
-int32_t
-jbr_ipc_fan_in(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- jbr_local_t *local = NULL;
- int32_t ret = -1;
- uint8_t call_count;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret,
- op_errno);
-
- LOCK(&frame->lock);
- call_count = --(local->call_count);
- UNLOCK(&frame->lock);
-
- if (call_count == 0) {
-#if defined(JBR_CG_QUEUE)
- ret = jbr_remove_from_queue(frame, this);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC,
- "Failed to remove from queue.");
- }
-#endif
- /*
- * Unrefing the reference taken in continue() or complete() *
- */
- dict_unref(local->xdata);
- STACK_DESTROY(frame->root);
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-int32_t
-jbr_ipc_complete(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- jbr_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
- local = frame->local;
- GF_VALIDATE_OR_GOTO(this->name, local, out);
-
- jbr_ipc_call_dispatch(frame, this, &op_errno, FDL_IPC_JBR_SERVER_ROLLBACK,
- local->xdata);
-out:
- return 0;
-}
-
-int32_t
-jbr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
-{
- switch (op) {
- case JBR_SERVER_TERM_RANGE:
- jbr_get_terms(frame, this);
- break;
- case JBR_SERVER_OPEN_TERM:
- jbr_open_term(frame, this, xdata);
- break;
- case JBR_SERVER_NEXT_ENTRY:
- jbr_next_entry(frame, this);
- break;
- case FDL_IPC_JBR_SERVER_ROLLBACK:
- /*
- * Just send the fop down to fdl. Need not *
- * dispatch it to other bricks in the sub- *
- * volume, as it will be done where the op *
- * has failed. *
- */
- default:
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ipc, op, xdata);
- }
-
- return 0;
-}
-
-#pragma generate
-
-int32_t
-jbr_forget(xlator_t *this, inode_t *inode)
-{
- uint64_t ctx = 0LL;
-
- if ((inode_ctx_del(inode, this, &ctx) == 0) && ctx) {
- GF_FREE((void *)(long)ctx);
- }
-
- return 0;
-}
-
-int32_t
-jbr_release(xlator_t *this, fd_t *fd)
-{
- uint64_t ctx = 0LL;
-
- if ((fd_ctx_del(fd, this, &ctx) == 0) && ctx) {
- GF_FREE((void *)(long)ctx);
- }
-
- return 0;
-}
-
-struct xlator_cbks cbks = {
- .forget = jbr_forget,
- .release = jbr_release,
-};
-
-int
-jbr_reconfigure(xlator_t *this, dict_t *options)
-{
- jbr_private_t *priv = this->private;
-
- GF_OPTION_RECONF("leader", priv->config_leader, options, bool, err);
- GF_OPTION_RECONF("quorum-percent", priv->quorum_pct, options, percent, err);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "reconfigure called, config_leader = %d, quorum_pct = %.1f\n",
- priv->leader, priv->quorum_pct);
-
- priv->leader = priv->config_leader;
-
- return 0;
-
-err:
- return -1;
-}
-
-int
-jbr_get_child_index(xlator_t *this, xlator_t *kid)
-{
- xlator_list_t *trav;
- int retval = -1;
-
- for (trav = this->children; trav; trav = trav->next) {
- ++retval;
- if (trav->xlator == kid) {
- return retval;
- }
- }
-
- return -1;
-}
-
-/*
- * Child notify handling is unreasonably FUBAR. Sometimes we'll get a
- * CHILD_DOWN for a protocol/client child before we ever got a CHILD_UP for it.
- * Other times we won't. Because it's effectively random (probably racy), we
- * can't just maintain a count. We actually have to keep track of the state
- * for each child separately, to filter out the bogus CHILD_DOWN events, and
- * then generate counts on demand.
- */
-int
-jbr_notify(xlator_t *this, int event, void *data, ...)
-{
- jbr_private_t *priv = this->private;
- int index = -1;
- int ret = -1;
- gf_boolean_t result = _gf_false;
- gf_boolean_t relevant = _gf_false;
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- index = jbr_get_child_index(this, data);
- if (index >= 0) {
- /* Check if the child was previously down
- * and it's not a false CHILD_UP
- */
- if (!(priv->kid_state & (1 << index))) {
- relevant = _gf_true;
- }
-
- priv->kid_state |= (1 << index);
- priv->up_children = jbr_count_up_kids(priv);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "got CHILD_UP for %s, now %u kids",
- ((xlator_t *)data)->name, priv->up_children);
- if (!priv->config_leader && (priv->up_children > 1)) {
- priv->leader = _gf_false;
- }
-
- /* If it's not relevant, or we have already *
- * sent CHILD_UP just break */
- if (!relevant || priv->child_up)
- break;
-
- /* If it's not a leader, just send the notify up */
- if (!priv->leader) {
- ret = default_notify(this, event, data);
- if (!ret)
- priv->child_up = _gf_true;
- break;
- }
-
- result = fop_quorum_check(this, (double)(priv->n_children - 1),
- (double)(priv->up_children - 1));
- if (result == _gf_false) {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "Not enough children "
- "are up to meet quorum. Waiting to "
- "send CHILD_UP from leader");
- } else {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "Enough children are up "
- "to meet quorum. Sending CHILD_UP "
- "from leader");
- ret = default_notify(this, event, data);
- if (!ret)
- priv->child_up = _gf_true;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- index = jbr_get_child_index(this, data);
- if (index >= 0) {
- /* Check if the child was previously up
- * and it's not a false CHILD_DOWN
- */
- if (priv->kid_state & (1 << index)) {
- relevant = _gf_true;
- }
- priv->kid_state &= ~(1 << index);
- priv->up_children = jbr_count_up_kids(priv);
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "got CHILD_DOWN for %s, now %u kids",
- ((xlator_t *)data)->name, priv->up_children);
- if (!priv->config_leader && (priv->up_children < 2) &&
- relevant) {
- priv->leader = _gf_true;
- }
-
- /* If it's not relevant, or we have already *
- * sent CHILD_DOWN just break */
- if (!relevant || !priv->child_up)
- break;
-
- /* If it's not a leader, just break coz we shouldn't *
- * propagate the failure from the failure till it *
- * itself goes down *
- */
- if (!priv->leader) {
- break;
- }
-
- result = fop_quorum_check(this, (double)(priv->n_children - 1),
- (double)(priv->up_children - 1));
- if (result == _gf_false) {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "Enough children are "
- "to down to fail quorum. "
- "Sending CHILD_DOWN from leader");
- ret = default_notify(this, event, data);
- if (!ret)
- priv->child_up = _gf_false;
- } else {
- gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC,
- "Not enough children "
- "are down to fail quorum. Waiting to "
- "send CHILD_DOWN from leader");
- }
- }
- break;
- default:
- ret = default_notify(this, event, data);
- }
-
- return ret;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("jbr", this, out);
-
- ret = xlator_mem_acct_init(this, gf_mt_jbr_end + 1);
-
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-void
-jbr_deallocate_priv(jbr_private_t *priv)
-{
- if (!priv) {
- return;
- }
-
- GF_FREE(priv);
-}
-
-int32_t
-jbr_init(xlator_t *this)
-{
- xlator_list_t *remote;
- xlator_list_t *local;
- jbr_private_t *priv = NULL;
- xlator_list_t *trav;
- pthread_t kid;
- extern xlator_t global_xlator;
- glusterfs_ctx_t *oldctx = global_xlator.ctx;
-
- /*
- * Any fop that gets special treatment has to be patched in here,
- * because the compiled-in table is produced by the code generator and
- * only contains generated functions. Note that we have to go through
- * this->fops because of some dynamic-linking strangeness; modifying
- * the static table doesn't work.
- */
- this->fops->getxattr = jbr_getxattr_special;
- this->fops->fsync = jbr_fsync;
-
- local = this->children;
- if (!local) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA,
- "no local subvolume");
- goto err;
- }
-
- remote = local->next;
- if (!remote) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA,
- "no remote subvolumes");
- goto err;
- }
-
- this->local_pool = mem_pool_new(jbr_local_t, 128);
- if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "failed to create jbr_local_t pool");
- goto err;
- }
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_mt_jbr_private_t);
- if (!priv) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR,
- "could not allocate priv");
- goto err;
- }
-
- for (trav = this->children; trav; trav = trav->next) {
- ++(priv->n_children);
- }
-
- LOCK_INIT(&priv->dirty_lock);
- LOCK_INIT(&priv->index_lock);
- INIT_LIST_HEAD(&priv->dirty_fds);
- priv->term_fd = -1;
-
- this->private = priv;
-
- GF_OPTION_INIT("leader", priv->config_leader, bool, err);
- GF_OPTION_INIT("quorum-percent", priv->quorum_pct, percent, err);
-
- priv->leader = priv->config_leader;
- priv->child_up = _gf_false;
-
- if (gf_thread_create(&kid, NULL, jbr_flush_thread, this, "jbrflush") != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE,
- "could not start flush thread");
- /* TBD: treat this as a fatal error? */
- }
-
- /*
- * Calling glfs_new changes old->ctx, even if THIS still points
- * to global_xlator. That causes problems later in the main
- * thread, when gf_log_dump_graph tries to use the FILE after
- * we've mucked with it and gets a segfault in __fprintf_chk.
- * We can avoid all that by undoing the damage before we
- * continue.
- */
- global_xlator.ctx = oldctx;
-
- return 0;
-
-err:
- jbr_deallocate_priv(priv);
- return -1;
-}
-
-void
-jbr_fini(xlator_t *this)
-{
- jbr_deallocate_priv(this->private);
-}
-
-class_methods_t class_methods = {
- .init = jbr_init,
- .fini = jbr_fini,
- .reconfigure = jbr_reconfigure,
- .notify = jbr_notify,
-};
-
-struct volume_options options[] = {
- {.key = {"leader"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "false",
- .description = "Start in the leader role. This is only for "
- "bootstrapping the code, and should go away when we "
- "have real leader election."},
- {.key = {"vol-name"},
- .type = GF_OPTION_TYPE_STR,
- .description = "volume name"},
- {.key = {"my-name"},
- .type = GF_OPTION_TYPE_STR,
- .description = "brick name in form of host:/path"},
- {.key = {"etcd-servers"},
- .type = GF_OPTION_TYPE_STR,
- .description = "list of comma separated etc servers"},
- {.key = {"subvol-uuid"},
- .type = GF_OPTION_TYPE_STR,
- .description = "UUID for this JBR (sub)volume"},
- {.key = {"quorum-percent"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "50.0",
- .description = "percentage of rep_count-1 that must be up"},
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/posix2/Makefile.am b/xlators/experimental/posix2/Makefile.am
deleted file mode 100644
index 74e5ab0f5bc..00000000000
--- a/xlators/experimental/posix2/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = common mds ds
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/README.md b/xlators/experimental/posix2/README.md
deleted file mode 100644
index 955a98d061e..00000000000
--- a/xlators/experimental/posix2/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# POSIX2 Experimental README
-
-POSIX2 is an implementation of modified storage translator to cater to DHT2
-on disk needs.
-
-For further understanding, refer to xlators/experimental/dht2/README.md for
-details regarding POSIX2
diff --git a/xlators/experimental/posix2/TODO.md b/xlators/experimental/posix2/TODO.md
deleted file mode 100644
index 20cd1e89c1d..00000000000
--- a/xlators/experimental/posix2/TODO.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# POSIX2 TODO List
-
-<Items will be added as code is pulled into the repository> \ No newline at end of file
diff --git a/xlators/experimental/posix2/common/Makefile.am b/xlators/experimental/posix2/common/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/posix2/common/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/common/src/Makefile.am b/xlators/experimental/posix2/common/src/Makefile.am
deleted file mode 100644
index 07533d2bf37..00000000000
--- a/xlators/experimental/posix2/common/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-lib_LTLIBRARIES = libposix2common.la
-
-posix2_common_sources = posix2-common.c
-
-libposix2common_la_SOURCES = $(posix2_common_sources)
-libposix2common_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-libposix2common_la_CFLAGS = -Wall $(GF_CFLAGS)
-
-libposix2common_la_CPPFLAGS = $(GF_CPPFLAGS)
-libposix2common_la_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-libposix2common_la_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-libposix2common_la_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/common/src/posix2-common.c b/xlators/experimental/posix2/common/src/posix2-common.c
deleted file mode 100644
index 14b51d538b2..00000000000
--- a/xlators/experimental/posix2/common/src/posix2-common.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: posix2-common.c
- * This file contains common routines across ds and mds posix xlators
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "statedump.h"
diff --git a/xlators/experimental/posix2/ds/Makefile.am b/xlators/experimental/posix2/ds/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/posix2/ds/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/ds/src/Makefile.am b/xlators/experimental/posix2/ds/src/Makefile.am
deleted file mode 100644
index 7a792a8d07b..00000000000
--- a/xlators/experimental/posix2/ds/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = posix2-ds.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-posix2_ds_sources = posix2-ds-main.c
-
-posix2_ds_la_SOURCES = $(posix2_ds_sources)
-posix2_ds_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-posix2_ds_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-posix2_ds_la_LIBADD += $(top_builddir)/xlators/experimental/posix2/common/src/libposix2common.la
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS)
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/storage/posix2/common/src
-AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
-AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/ds/src/posix2-ds-main.c b/xlators/experimental/posix2/ds/src/posix2-ds-main.c
deleted file mode 100644
index 4e399a98ed4..00000000000
--- a/xlators/experimental/posix2/ds/src/posix2-ds-main.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: posix2-ds-main.c
- * This file contains the xlator loading functions, FOP entry points
- * and options.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
-
-int32_t
-posix2_ds_init(xlator_t *this)
-{
- if (this->children) {
- gf_log(this->name, GF_LOG_ERROR,
- "This (%s) is a leaf xlator, but found children", this->name);
- return -1;
- }
-
- return 0;
-}
-
-void
-posix2_ds_fini(xlator_t *this)
-{
- return;
-}
-
-class_methods_t class_methods = {
- .init = posix2_ds_init,
- .fini = posix2_ds_fini,
-};
-
-struct xlator_fops fops = {};
-
-struct xlator_cbks cbks = {};
-
-/*
-struct xlator_dumpops dumpops = {
-};
-*/
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/experimental/posix2/mds/Makefile.am b/xlators/experimental/posix2/mds/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/experimental/posix2/mds/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/mds/src/Makefile.am b/xlators/experimental/posix2/mds/src/Makefile.am
deleted file mode 100644
index 0681cb73c45..00000000000
--- a/xlators/experimental/posix2/mds/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-if WITH_SERVER
-xlator_LTLIBRARIES = posix2-mds.la
-endif
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
-
-posix2_mds_sources = posix2-mds-main.c
-
-posix2_mds_la_SOURCES = $(posix2_mds_sources)
-posix2_mds_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-posix2_mds_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-posix2_mds_la_LIBADD += $(top_builddir)/xlators/experimental/posix2/common/src/libposix2common.la
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
-
-AM_CPPFLAGS = $(GF_CPPFLAGS)
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/storage/posix2/common/src
-AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
-AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
-AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
-AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
-
-CLEANFILES =
diff --git a/xlators/experimental/posix2/mds/src/posix2-mds-main.c b/xlators/experimental/posix2/mds/src/posix2-mds-main.c
deleted file mode 100644
index 58da05f2091..00000000000
--- a/xlators/experimental/posix2/mds/src/posix2-mds-main.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* File: posix2-mds-main.c
- * This file contains the xlator loading functions, FOP entry points
- * and options.
- * The entire functionality including comments is TODO.
- */
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
-
-int32_t
-posix2_mds_init(xlator_t *this)
-{
- if (this->children) {
- gf_log(this->name, GF_LOG_ERROR,
- "This (%s) is a leaf xlator, but found children", this->name);
- return -1;
- }
-
- return 0;
-}
-
-void
-posix2_mds_fini(xlator_t *this)
-{
- return;
-}
-
-class_methods_t class_methods = {
- .init = posix2_mds_init,
- .fini = posix2_mds_fini,
-};
-
-struct xlator_fops fops = {};
-
-struct xlator_cbks cbks = {};
-
-/*
-struct xlator_dumpops dumpops = {
-};
-*/
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 194634b003d..c57897f11ea 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -2,9 +2,13 @@ if BUILD_CLOUDSYNC
CLOUDSYNC_DIR = cloudsync
endif
+if BUILD_METADISP
+ METADISP_DIR = metadisp
+endif
+
SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
compress changelog gfid-access snapview-client snapview-server trash \
shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
- utime
+ utime $(METADISP_DIR)
CLEANFILES =
diff --git a/xlators/features/arbiter/src/arbiter-mem-types.h b/xlators/features/arbiter/src/arbiter-mem-types.h
index 0f77cfd05f4..05d18374c46 100644
--- a/xlators/features/arbiter/src/arbiter-mem-types.h
+++ b/xlators/features/arbiter/src/arbiter-mem-types.h
@@ -9,7 +9,7 @@
#ifndef __ARBITER_MEM_TYPES_H__
#define __ARBITER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
typedef enum gf_arbiter_mem_types_ {
gf_arbiter_mt_inode_ctx_t = gf_common_mt_end + 1,
diff --git a/xlators/features/arbiter/src/arbiter.c b/xlators/features/arbiter/src/arbiter.c
index 4af68f9ba52..83a97e3354b 100644
--- a/xlators/features/arbiter/src/arbiter.c
+++ b/xlators/features/arbiter/src/arbiter.c
@@ -10,9 +10,9 @@
#include "arbiter.h"
#include "arbiter-mem-types.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
static arbiter_inode_ctx_t *
__arbiter_inode_ctx_get(inode_t *inode, xlator_t *this)
@@ -365,3 +365,16 @@ struct xlator_cbks cbks = {
struct volume_options options[] = {
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "arbiter",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/arbiter/src/arbiter.h b/xlators/features/arbiter/src/arbiter.h
index ce1c909f70f..546db7b751a 100644
--- a/xlators/features/arbiter/src/arbiter.h
+++ b/xlators/features/arbiter/src/arbiter.h
@@ -11,8 +11,8 @@
#ifndef _ARBITER_H
#define _ARBITER_H
-#include "locking.h"
-#include "common-utils.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/common-utils.h>
typedef struct arbiter_inode_ctx_ {
struct iatt iattbuf;
diff --git a/xlators/features/barrier/src/barrier-mem-types.h b/xlators/features/barrier/src/barrier-mem-types.h
index 93ccab633ce..71ed7898d9c 100644
--- a/xlators/features/barrier/src/barrier-mem-types.h
+++ b/xlators/features/barrier/src/barrier-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __BARRIER_MEM_TYPES_H__
#define __BARRIER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_barrier_mem_types_ {
gf_barrier_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c
index 1c5c5ffdc22..852bbacb99d 100644
--- a/xlators/features/barrier/src/barrier.c
+++ b/xlators/features/barrier/src/barrier.c
@@ -9,10 +9,10 @@
*/
#include "barrier.h"
-#include "defaults.h"
-#include "call-stub.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
-#include "statedump.h"
+#include <glusterfs/statedump.h>
void
barrier_local_set_gfid(call_frame_t *frame, uuid_t gfid, xlator_t *this)
@@ -461,16 +461,14 @@ out:
int
notify(xlator_t *this, int event, void *data, ...)
{
- barrier_priv_t *priv = NULL;
+ barrier_priv_t *priv = this->private;
dict_t *dict = NULL;
- gf_boolean_t past = _gf_false;
int ret = -1;
int barrier_enabled = _gf_false;
struct list_head queue = {
0,
};
- priv = this->private;
GF_ASSERT(priv);
INIT_LIST_HEAD(&queue);
@@ -488,35 +486,27 @@ notify(xlator_t *this, int event, void *data, ...)
LOCK(&priv->lock);
{
- past = priv->barrier_enabled;
-
- switch (past) {
- case _gf_false:
- if (barrier_enabled) {
- ret = __barrier_enable(this, priv);
- if (ret)
- goto unlock;
- } else {
- gf_log(this->name, GF_LOG_ERROR,
- "Already disabled.");
- goto unlock;
- }
- break;
-
- case _gf_true:
- if (!barrier_enabled) {
- __barrier_disable(this, &queue);
- } else {
- gf_log(this->name, GF_LOG_ERROR, "Already enabled");
- goto unlock;
- }
- break;
+ if (!priv->barrier_enabled) {
+ if (barrier_enabled) {
+ ret = __barrier_enable(this, priv);
+ } else {
+ UNLOCK(&priv->lock);
+ gf_log(this->name, GF_LOG_ERROR, "Already disabled.");
+ goto post_unlock;
+ }
+ } else {
+ if (!barrier_enabled) {
+ __barrier_disable(this, &queue);
+ ret = 0;
+ } else {
+ UNLOCK(&priv->lock);
+ gf_log(this->name, GF_LOG_ERROR, "Already enabled");
+ goto post_unlock;
+ }
}
- ret = 0;
}
- unlock:
UNLOCK(&priv->lock);
-
+ post_unlock:
if (!list_empty(&queue))
barrier_dequeue_all(this, &queue);
@@ -536,7 +526,6 @@ int
reconfigure(xlator_t *this, dict_t *options)
{
barrier_priv_t *priv = NULL;
- gf_boolean_t past = _gf_false;
int ret = -1;
gf_boolean_t barrier_enabled = _gf_false;
uint32_t timeout = {
@@ -556,23 +545,17 @@ reconfigure(xlator_t *this, dict_t *options)
LOCK(&priv->lock);
{
- past = priv->barrier_enabled;
-
- switch (past) {
- case _gf_false:
- if (barrier_enabled) {
- ret = __barrier_enable(this, priv);
- if (ret) {
- goto unlock;
- }
+ if (!priv->barrier_enabled) {
+ if (barrier_enabled) {
+ ret = __barrier_enable(this, priv);
+ if (ret) {
+ goto unlock;
}
- break;
-
- case _gf_true:
- if (!barrier_enabled) {
- __barrier_disable(this, &queue);
- }
- break;
+ }
+ } else {
+ if (!barrier_enabled) {
+ __barrier_disable(this, &queue);
+ }
}
priv->timeout.tv_sec = timeout;
ret = 0;
@@ -746,13 +729,13 @@ barrier_dump_priv(xlator_t *this)
gf_proc_dump_build_key(key, "xlator.features.barrier", "priv");
gf_proc_dump_add_section("%s", key);
+ gf_proc_dump_build_key(key, "barrier", "enabled");
LOCK(&priv->lock);
{
- gf_proc_dump_build_key(key, "barrier", "enabled");
gf_proc_dump_write(key, "%d", priv->barrier_enabled);
gf_proc_dump_build_key(key, "barrier", "timeout");
- gf_proc_dump_write(key, "%" PRId64, priv->timeout.tv_sec);
+ gf_proc_dump_write(key, "%ld", priv->timeout.tv_sec);
if (priv->barrier_enabled) {
gf_proc_dump_build_key(key, "barrier", "queue_size");
gf_proc_dump_write(key, "%d", priv->queue_size);
@@ -809,3 +792,18 @@ struct volume_options options[] = {
"blocked acknowledgements are sent to the application"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "barrier",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/barrier/src/barrier.h b/xlators/features/barrier/src/barrier.h
index d11d71d151e..1337f311f7d 100644
--- a/xlators/features/barrier/src/barrier.h
+++ b/xlators/features/barrier/src/barrier.h
@@ -12,9 +12,9 @@
#define __BARRIER_H__
#include "barrier-mem-types.h"
-#include "xlator.h"
-#include "timer.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/call-stub.h>
#define BARRIER_FOP_CBK(fop_name, label, frame, this, params...) \
do { \
@@ -65,11 +65,12 @@
typedef struct {
gf_timer_t *timer;
- gf_boolean_t barrier_enabled;
gf_lock_t lock;
struct list_head queue;
struct timespec timeout;
uint32_t queue_size;
+ gf_boolean_t barrier_enabled;
+ char _pad[3]; /* manual padding */
} barrier_priv_t;
int
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
index 6f59933a31d..5bc5103a27c 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
@@ -11,7 +11,7 @@
#ifndef _BITROT_BITD_MESSAGES_H_
#define _BITROT_BITD_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -47,6 +47,55 @@ GLFS_MSGID(BITROT_BITD, BRB_MSG_FD_CREATE_FAILED, BRB_MSG_READV_FAILED,
BRB_MSG_SCRUB_THREAD_CLEANUP, BRB_MSG_SCRUBBER_CLEANED,
BRB_MSG_GENERIC_SSM_INFO, BRB_MSG_ZERO_TIMEOUT_BUG,
BRB_MSG_BAD_OBJ_READDIR_FAIL, BRB_MSG_SSM_FAILED,
- BRB_MSG_SCRUB_WAIT_FAILED);
+ BRB_MSG_SCRUB_WAIT_FAILED, BRB_MSG_TRIGGER_SIGN_FAILED,
+ BRB_MSG_EVENT_UNHANDLED, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB,
+ BRB_MSG_THREAD_CREATION_FAILED, BRB_MSG_MEM_POOL_ALLOC,
+ BRB_MSG_SAVING_HASH_FAILED);
+#define BRB_MSG_FD_CREATE_FAILED_STR "failed to create fd for the inode"
+#define BRB_MSG_READV_FAILED_STR "readv failed"
+#define BRB_MSG_BLOCK_READ_FAILED_STR "reading block failed"
+#define BRB_MSG_NO_MEMORY_STR "failed to allocate memory"
+#define BRB_MSG_CALC_CHECKSUM_FAILED_STR "calculating checksum failed"
+#define BRB_MSG_GET_SIGN_FAILED_STR "failed to get the signature"
+#define BRB_MSG_SET_SIGN_FAILED_STR "signing failed"
+#define BRB_MSG_OP_FAILED_STR "failed on object"
+#define BRB_MSG_TRIGGER_SIGN_FAILED_STR "Could not trigger signing"
+#define BRB_MSG_READ_AND_SIGN_FAILED_STR "reading and signing of object failed"
+#define BRB_MSG_SET_TIMER_FAILED_STR "Failed to allocate object expiry timer"
+#define BRB_MSG_GET_SUBVOL_FAILED_STR \
+ "failed to get the subvolume for the brick"
+#define BRB_MSG_PATH_FAILED_STR "path failed"
+#define BRB_MSG_SKIP_OBJECT_STR "Entry is marked corrupted. skipping"
+#define BRB_MSG_PARTIAL_VERSION_PRESENCE_STR \
+ "PArtial version xattr presence detected, ignoring"
+#define BRB_MSG_TRIGGER_SIGN_STR "Triggering signing"
+#define BRB_MSG_CRAWLING_START_STR \
+ "Crawling brick, scanning for unsigned objects"
+#define BRB_MSG_CRAWLING_FINISH_STR "Completed crawling brick"
+#define BRB_MSG_REGISTER_FAILED_STR "Register to changelog failed"
+#define BRB_MSG_SPAWN_FAILED_STR "failed to spawn"
+#define BRB_MSG_CONNECTED_TO_BRICK_STR "Connected to brick"
+#define BRB_MSG_LOOKUP_FAILED_STR "lookup on root failed"
+#define BRB_MSG_GET_INFO_FAILED_STR "failed to get stub info"
+#define BRB_MSG_SCRUB_THREAD_CLEANUP_STR "Error cleaning up scanner thread"
+#define BRB_MSG_SCRUBBER_CLEANED_STR "clened up scrubber for brick"
+#define BRB_MSG_SUBVOL_CONNECT_FAILED_STR \
+ "callback handler for subvolume failed"
+#define BRB_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
+#define BRB_MSG_EVENT_UNHANDLED_STR "Event unhandled for child"
+#define BRB_MSG_INVALID_SUBVOL_STR "Got event from invalid subvolume"
+#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED_STR \
+ "on demand scrub schedule failed. Scrubber is not in pending state."
+#define BRB_MSG_COULD_NOT_SCHEDULE_SCRUB_STR \
+ "Could not schedule ondemand scrubbing. Scrubbing will continue " \
+ "according to old frequency."
+#define BRB_MSG_THREAD_CREATION_FAILED_STR "thread creation failed"
+#define BRB_MSG_RATE_LIMIT_INFO_STR "Rate Limit Info"
+#define BRB_MSG_MEM_POOL_ALLOC_STR "failed to allocate mem-pool for timer"
+#define BRB_MSG_NO_CHILD_STR "FATAL: no children"
+#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE_STR "global timer wheel unavailable"
+#define BRB_MSG_BITROT_LOADED_STR "bit-rot xlator loaded"
+#define BRB_MSG_SAVING_HASH_FAILED_STR \
+ "failed to allocate memory for saving hash of the object"
#endif /* !_BITROT_BITD_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
index 34e20f9df11..5cef2ffa5e5 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -40,21 +40,21 @@ br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat)
}
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv)
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time)
{
if (!scrub_stat)
return;
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_start_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_start_time = time;
}
pthread_mutex_unlock(&scrub_stat->lock);
}
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv)
+ time_t time)
{
int lst_size = 0;
@@ -67,10 +67,10 @@ br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
pthread_mutex_lock(&scrub_stat->lock);
{
- scrub_stat->scrub_end_tv.tv_sec = tv->tv_sec;
+ scrub_stat->scrub_end_time = time;
- scrub_stat->scrub_duration = scrub_stat->scrub_end_tv.tv_sec -
- scrub_stat->scrub_start_tv.tv_sec;
+ scrub_stat->scrub_duration = scrub_stat->scrub_end_time -
+ scrub_stat->scrub_start_time;
snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr);
}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
index 24128b90a66..f022aa831eb 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -15,20 +15,22 @@
#include <sys/time.h>
#include <pthread.h>
+#include <glusterfs/common-utils.h>
+
struct br_scrub_stats {
- uint64_t scrubbed_files; /* Total number of scrubbed file */
+ uint64_t scrubbed_files; /* Total number of scrubbed files. */
- uint64_t unsigned_files; /* Total number of unsigned file */
+ uint64_t unsigned_files; /* Total number of unsigned files. */
- uint64_t scrub_duration; /* Duration of last scrub */
+ uint64_t scrub_duration; /* Duration of last scrub. */
- char last_scrub_time[1024]; /*last scrub completion time */
+ char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */
- struct timeval scrub_start_tv; /* Scrubbing starting time*/
+ time_t scrub_start_time; /* Scrubbing starting time. */
- struct timeval scrub_end_tv; /* Scrubbing finishing time */
+ time_t scrub_end_time; /* Scrubbing finishing time. */
- int8_t scrub_running; /* Scrub running or not */
+ int8_t scrub_running; /* Whether scrub running or not. */
pthread_mutex_t lock;
};
@@ -40,9 +42,9 @@ br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat);
void
br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat);
void
-br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, struct timeval *tv);
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time);
void
br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
- struct timeval *tv);
+ time_t time);
#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
index b856c168eb7..289dd53f610 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -12,15 +12,15 @@
#include <ctype.h>
#include <sys/uio.h>
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
#include "bit-rot-scrub.h"
#include <pthread.h>
#include "bit-rot-bitd-messages.h"
#include "bit-rot-scrub-status.h"
-#include "events.h"
+#include <glusterfs/events.h>
struct br_scrubbers {
pthread_t scrubthread;
@@ -130,6 +130,8 @@ bitd_scrub_post_compute_check(xlator_t *this, br_child_t *child, fd_t *fd,
(void)memcpy(*signature, signptr, sizeof(br_isignature_out_t) + signlen);
+ (*signature)->signaturelen = signlen;
+
unref_dict:
dict_unref(xattr);
out:
@@ -222,7 +224,7 @@ bitd_compare_ckum(xlator_t *this, br_isignature_out_t *sign, unsigned char *md,
GF_VALIDATE_OR_GOTO(this->name, md, out);
GF_VALIDATE_OR_GOTO(this->name, entry, out);
- if (strncmp(sign->signature, (char *)md, strlen(sign->signature)) == 0) {
+ if (strncmp(sign->signature, (char *)md, sign->signaturelen) == 0) {
gf_msg_debug(this->name, 0,
"%s [GFID: %s | Brick: %s] "
"matches calculated checksum",
@@ -599,25 +601,23 @@ br_fsscan_deactivate(xlator_t *this)
static void
br_scrubber_log_time(xlator_t *this, const char *sfx)
{
- char timestr[1024] = {
- 0,
- };
- struct timeval tv = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
br_private_t *priv = NULL;
+ time_t now = 0;
+ now = gf_time();
priv = this->private;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
- br_update_scrub_start_time(&priv->scrub_stat, &tv);
+ br_update_scrub_start_time(&priv->scrub_stat, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
"Scrubbing %s at %s", sfx, timestr);
} else {
- br_update_scrub_finish_time(&priv->scrub_stat, timestr, &tv);
+ br_update_scrub_finish_time(&priv->scrub_stat, timestr, now);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
"Scrubbing %s at %s", sfx, timestr);
}
@@ -626,15 +626,13 @@ br_scrubber_log_time(xlator_t *this, const char *sfx)
static void
br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx)
{
- char timestr[1024] = {
- 0,
- };
- struct timeval tv = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
- gettimeofday(&tv, NULL);
- gf_time_fmt(timestr, sizeof(timestr), tv.tv_sec, gf_timefmt_FT);
+ now = gf_time();
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
if (strcasecmp(sfx, "started") == 0) {
gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
@@ -718,8 +716,10 @@ br_scrubber_exit_control(xlator_t *this)
if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) {
(void)br_fsscan_activate(this);
} else {
+ UNLOCK(&scrub_monitor->lock);
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
"Volume waiting to get rescheduled..");
+ return;
}
}
UNLOCK(&scrub_monitor->lock);
@@ -915,10 +915,7 @@ br_fsscan_schedule(xlator_t *this)
{
uint32_t timo = 0;
br_private_t *priv = NULL;
- struct timeval tv = {
- 0,
- };
- char timestr[1024] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
struct br_scrubber *fsscrub = NULL;
@@ -929,8 +926,7 @@ br_fsscan_schedule(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&tv, NULL);
- scrub_monitor->boot = tv.tv_sec;
+ scrub_monitor->boot = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
@@ -971,12 +967,10 @@ int32_t
br_fsscan_activate(xlator_t *this)
{
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -985,7 +979,7 @@ br_fsscan_activate(xlator_t *this)
fsscrub = &priv->fsscrub;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -999,7 +993,7 @@ br_fsscan_activate(xlator_t *this)
}
pthread_mutex_unlock(&scrub_monitor->donelock);
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
(void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo);
_br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
@@ -1016,12 +1010,10 @@ br_fsscan_reschedule(xlator_t *this)
{
int32_t ret = 0;
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_scrubber *fsscrub = NULL;
struct br_monitor *scrub_monitor = NULL;
@@ -1033,7 +1025,7 @@ br_fsscan_reschedule(xlator_t *this)
if (!fsscrub->frequency_reconf)
return 0;
- (void)gettimeofday(&now, NULL);
+ now = gf_time();
timo = br_fsscan_calculate_timeout(fsscrub->frequency);
if (timo == 0) {
gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
@@ -1041,7 +1033,7 @@ br_fsscan_reschedule(xlator_t *this)
return -1;
}
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1069,23 +1061,19 @@ br_fsscan_ondemand(xlator_t *this)
{
int32_t ret = 0;
uint32_t timo = 0;
- char timestr[1024] = {
- 0,
- };
- struct timeval now = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
+ time_t now = 0;
br_private_t *priv = NULL;
struct br_monitor *scrub_monitor = NULL;
priv = this->private;
scrub_monitor = &priv->scrub_monitor;
- (void)gettimeofday(&now, NULL);
-
+ now = gf_time();
timo = BR_SCRUB_ONDEMAND;
-
- gf_time_fmt(timestr, sizeof(timestr), (now.tv_sec + timo), gf_timefmt_FT);
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
pthread_mutex_lock(&scrub_monitor->donelock);
{
@@ -1543,9 +1531,11 @@ br_scrubber_log_option(xlator_t *this, br_private_t *priv,
[BR_SCRUB_THROTTLE_LAZY] = "lazy",
[BR_SCRUB_THROTTLE_NORMAL] = "normal",
[BR_SCRUB_THROTTLE_AGGRESSIVE] = "aggressive",
+ [BR_SCRUB_THROTTLE_STALLED] = "stalled",
};
char *scrub_freq_str[] = {
+ [0] = "",
[BR_FSSCRUB_FREQ_HOURLY] = "hourly",
[BR_FSSCRUB_FREQ_DAILY] = "daily",
[BR_FSSCRUB_FREQ_WEEKLY] = "weekly",
@@ -1558,6 +1548,8 @@ br_scrubber_log_option(xlator_t *this, br_private_t *priv,
return; /* logged as pause */
if (fsscrub->frequency_reconf || fsscrub->throttle_reconf) {
+ if (fsscrub->throttle == BR_SCRUB_THROTTLE_VOID)
+ return;
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_TUNABLE,
"SCRUB TUNABLES:: [Frequency: %s, Throttle: %s]",
scrub_freq_str[fsscrub->frequency],
@@ -1649,7 +1641,7 @@ br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd,
int32_t ret = -1;
off_t offset = 0;
int32_t count = 0;
- char key[PATH_MAX] = {
+ char key[32] = {
0,
};
dict_t *out_dict = NULL;
@@ -1687,7 +1679,7 @@ br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd,
}
ret = count;
- ret = dict_set_int32(dict, "count", count);
+ ret = dict_set_int32_sizen(dict, "count", count);
out:
return ret;
@@ -1769,10 +1761,10 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
{
int32_t ret = -1;
int32_t count = 0;
- char key[PATH_MAX] = {
+ char key[32] = {
0,
};
- char main_key[PATH_MAX] = {
+ char main_key[32] = {
0,
};
int32_t j = 0;
@@ -1784,15 +1776,15 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
char *path = NULL;
int32_t len = 0;
- ret = dict_get_int32(child_dict, "count", &count);
+ ret = dict_get_int32_sizen(child_dict, "count", &count);
if (ret)
goto out;
tmp_count = total_count;
for (j = 0; j < count; j++) {
- snprintf(key, PATH_MAX, "quarantine-%d", j);
- ret = dict_get_str(child_dict, key, &entry);
+ len = snprintf(key, sizeof(key), "quarantine-%d", j);
+ ret = dict_get_strn(child_dict, key, len, &entry);
if (ret)
continue;
@@ -1802,7 +1794,7 @@ br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
if ((len < 0) || (len >= PATH_MAX)) {
continue;
}
- snprintf(main_key, PATH_MAX, "quarantine-%d", tmp_count);
+ snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count);
ret = dict_set_dynstr_with_alloc(dict, main_key, tmp);
if (!ret)
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
index 7a3c14abb93..4e5f67bc021 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -11,7 +11,7 @@
#ifndef __BIT_ROT_SCRUB_H__
#define __BIT_ROT_SCRUB_H__
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "bit-rot.h"
void *
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
index f3fbe2928b7..37b45a42eac 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
@@ -11,7 +11,7 @@
#ifndef __BIT_ROT_SSM_H__
#define __BIT_ROT_SSM_H__
-#include "xlator.h"
+#include <glusterfs/xlator.h>
typedef enum br_scrub_state {
BR_SCRUB_STATE_INACTIVE = 0,
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
index 8ea89257836..a2f1c343a1d 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -9,12 +9,9 @@
*/
#include <ctype.h>
-#include <sys/uio.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "compat-errno.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/compat-errno.h>
#include "bit-rot.h"
#include "bit-rot-scrub.h"
@@ -244,8 +241,8 @@ br_object_open(xlator_t *this, br_object_t *object, inode_t *inode,
ret = -EINVAL;
fd = fd_create(inode, 0);
if (!fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
- "failed to create fd for the inode %s", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
goto out;
}
@@ -299,8 +296,8 @@ br_object_read_block_and_sign(xlator_t *this, fd_t *fd, br_child_t *child,
NULL, NULL, NULL);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED,
- "readv on %s failed", uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
ret = -1;
goto out;
}
@@ -350,9 +347,9 @@ br_calculate_obj_checksum(unsigned char *md, br_child_t *child, fd_t *fd,
ret = br_object_read_block_and_sign(this, fd, child, offset, block,
&sha256);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED,
- "reading block with offset %" PRIu64 " of object %s failed",
- offset, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED,
+ "offset=%" PRIu64, offset, "object-gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
break;
}
@@ -394,28 +391,23 @@ br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object,
md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char);
if (!md) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate memory for saving hash of the "
- "object %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_SAVING_HASH_FAILED,
+ "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
ret = br_object_checksum(md, object, fd, iatt);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED,
- "calculating checksum "
- "for the object %s failed",
- uuid_utoa(linked_inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED,
+ "object-gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
goto free_signature;
}
sign = br_prepare_signature(md, SHA256_DIGEST_LENGTH,
BR_SIGNATURE_TYPE_SHA256, object);
if (!sign) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
- "failed to get the signature for the object %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto free_signature;
}
@@ -423,17 +415,16 @@ br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object,
signature_size(SHA256_DIGEST_LENGTH), _gf_true);
if (!xattr) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
- "dict allocation for signing failed for the object %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "dict-allocation object-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
goto free_isign;
}
ret = syncop_fsetxattr(object->child->xl, fd, xattr, 0, NULL, NULL);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
- "fsetxattr of signature to the object %s failed",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "fsetxattr object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto unref_dict;
}
@@ -466,8 +457,8 @@ br_log_object(xlator_t *this, char *op, uuid_t gfid, int32_t op_errno)
"[reason: %s]",
op, uuid_utoa(gfid), strerror(op_errno));
} else {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED,
- "%s() failed on object %s", op, uuid_utoa(gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s",
+ op, "gfid=%s", uuid_utoa(gfid), NULL);
}
}
@@ -481,8 +472,8 @@ br_log_object_path(xlator_t *this, char *op, const char *path, int32_t op_errno)
"[reason: %s]",
op, path, strerror(op_errno));
} else {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED,
- "%s() failed on object %s", op, path);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s",
+ op, "path=%s", path, NULL);
}
}
@@ -511,8 +502,8 @@ br_trigger_sign(xlator_t *this, br_child_t *child, inode_t *linked_inode,
ret = -1;
fd = fd_create(linked_inode, 0);
if (!fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
- "Failed to create fd [GFID %s]", uuid_utoa(linked_inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
goto cleanup_dict;
}
@@ -536,9 +527,9 @@ cleanup_dict:
dict_unref(dict);
out:
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN,
- "Could not trigger signingd for %s (reopen hint: %d)",
- uuid_utoa(linked_inode->gfid), val);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), "reopen-hint-val=%d",
+ val, NULL);
}
}
@@ -618,10 +609,8 @@ br_sign_object(br_object_t *object)
ret = br_object_read_sign(linked_inode, fd, object, &iatt);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED,
- "reading and signing of "
- "the object %s failed",
- uuid_utoa(linked_inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
goto unref_fd;
}
@@ -675,8 +664,8 @@ br_process_object(void *arg)
ret = br_sign_object(object);
if (ret && !br_object_sign_softerror(-ret))
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SIGN_FAILED,
- "SIGNING FAILURE [%s]", uuid_utoa(object->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(object->gfid), NULL);
GF_FREE(object);
}
@@ -778,9 +767,8 @@ br_schedule_object_reopen(xlator_t *this, br_object_t *object,
timer = br_initialize_timer(this, object, child, ev);
if (!timer)
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED,
- "Failed to allocate object expiry timer [GFID: %s]",
- uuid_utoa(object->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED,
+ "gfid=%s", uuid_utoa(object->gfid), NULL);
return timer ? 0 : -1;
}
@@ -827,15 +815,15 @@ br_brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev)
child = br_get_child_from_brick_path(this, brick);
if (!child) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED,
- "failed to get the subvolume for the brick %s", brick);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED,
+ "brick=%s", brick, NULL);
goto out;
}
object = br_initialize_object(this, child, ev);
if (!object) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate object memory [GFID: %s]", uuid_utoa(gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "object-gfid=%s", uuid_utoa(gfid), NULL);
goto out;
}
@@ -887,8 +875,8 @@ br_check_object_need_sign(xlator_t *this, dict_t *xattr, br_child_t *child)
ret = dict_get_ptr(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)&sign);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
- "failed to get object signature info");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "object-info", NULL);
goto out;
}
@@ -927,9 +915,9 @@ br_prepare_loc(xlator_t *this, br_child_t *child, loc_t *parent,
ret = inode_path(parent->inode, entry->d_name, (char **)&loc->path);
if (ret < 0 || !loc->path) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED,
- "inode_path on %s (parent: %s) failed", entry->d_name,
- uuid_utoa(parent->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED,
+ "inode_path=%s", entry->d_name, "parent-gfid=%s",
+ uuid_utoa(parent->inode->gfid), NULL);
goto out;
}
@@ -973,6 +961,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
int32_t ret = -1;
inode_t *linked_inode = NULL;
gf_boolean_t need_signing = _gf_false;
+ gf_boolean_t need_reopen = _gf_true;
GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
GF_VALIDATE_OR_GOTO("bit-rot", data, out);
@@ -1020,8 +1009,8 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
*/
if (bitd_is_bad_file(this, child, &loc, NULL)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT,
- "Entry [%s] is marked corrupted.. skipping.", loc.path);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, "path=%s",
+ loc.path, NULL);
goto unref_inode;
}
@@ -1038,23 +1027,32 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
if (op_errno == ENODATA && (iatt.ia_size != 0))
need_signing = _gf_true;
if (op_errno == EINVAL)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- BRB_MSG_PARTIAL_VERSION_PRESENCE,
- "Partial "
- "version xattr presence detected, ignoring "
- "[GFID: %s]",
- uuid_utoa(linked_inode->gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRB_MSG_PARTIAL_VERSION_PRESENCE, "gfid=%s",
+ uuid_utoa(linked_inode->gfid), NULL);
} else {
need_signing = br_check_object_need_sign(this, xattr, child);
+
+ /*
+ * If we are here means, bitrot daemon has started. Is it just
+ * a simple restart of the daemon or is it started because the
+ * feature is enabled is something hard to determine. Hence,
+ * if need_signing is false (because bit-rot version and signature
+ * are present), then still go ahead and sign it.
+ */
+ if (!need_signing) {
+ need_signing = _gf_true;
+ need_reopen = _gf_true;
+ }
}
if (!need_signing)
goto unref_dict;
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
- "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path,
- uuid_utoa(linked_inode->gfid), child->brick_path);
- br_trigger_sign(this, child, linked_inode, &loc, _gf_true);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, "path=%s",
+ loc.path, "gfid=%s", uuid_utoa(linked_inode->gfid), "Brick-path=%s",
+ child->brick_path, NULL);
+ br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
ret = 0;
@@ -1086,17 +1084,16 @@ br_oneshot_signer(void *arg)
THIS = this;
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START,
- "Crawling brick [%s], scanning for unsigned objects",
- child->brick_path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, "brick-path=%s",
+ child->brick_path, NULL);
loc.inode = child->table->root;
(void)syncop_ftw_throttle(child->xl, &loc, GF_CLIENT_PID_BITD, child,
bitd_oneshot_crawl, BR_CRAWL_THROTTLE_COUNT,
BR_CRAWL_THROTTLE_ZZZ);
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH,
- "Completed crawling brick [%s]", child->brick_path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH,
+ "brick-path=%s", child->brick_path, NULL);
return NULL;
}
@@ -1140,9 +1137,7 @@ br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
ret = gf_changelog_register_generic(brick, 1, 1,
this->ctx->cmd_args.log_file, -1, this);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED,
- "Register to changelog "
- "failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, NULL);
goto dealloc;
}
@@ -1150,8 +1145,8 @@ br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
ret = gf_thread_create(&child->thread, NULL, br_oneshot_signer, child,
"brosign");
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED,
- "failed to spawn FS crawler thread");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED,
+ "FS-crawler-thread", NULL);
else
child->threadrunning = 1;
@@ -1179,9 +1174,9 @@ br_launch_scrubber(xlator_t *this, br_child_t *child, struct br_scanfs *fsscan,
ret = gf_thread_create(&child->thread, NULL, br_fsscanner, child,
"brfsscan");
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
- "failed to spawn bitrot scrubber daemon [Brick: %s]",
- child->brick_path);
+ gf_smsg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
+ "bitrot-scrubber-daemon Brick-path=%s", child->brick_path,
+ NULL);
goto error_return;
}
@@ -1269,8 +1264,8 @@ br_child_enaction(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
if (!ret) {
child->witnessed = 1;
_br_set_child_state(child, BR_CHILD_STATE_CONNECTED);
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK,
- "Connected to brick %s..", child->brick_path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK,
+ "brick-path=%s", child->brick_path, NULL);
}
}
pthread_mutex_unlock(&child->lock);
@@ -1317,8 +1312,8 @@ br_brick_connect(xlator_t *this, br_child_t *child)
if (ret) {
op_errno = -ret;
ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED,
- "lookup on root failed");
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED,
+ NULL);
goto wipeloc;
}
@@ -1327,15 +1322,14 @@ br_brick_connect(xlator_t *this, br_child_t *child)
if (ret) {
op_errno = -ret;
ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED,
- "failed to get stub info");
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED,
+ NULL);
goto wipeloc;
}
ret = dict_get_ptr(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, (void **)&stub);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED,
- "failed to extract stub information");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, NULL);
goto free_dict;
}
@@ -1405,11 +1399,10 @@ br_cleanup_scrubber(xlator_t *this, br_child_t *child)
*/
ret = gf_thread_cleanup_xint(child->thread);
if (ret)
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP,
- "Error cleaning up scanner thread");
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, NULL);
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED,
- "Cleaned up scrubber for brick [%s]", child->brick_path);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED,
+ "brick-path=%s", child->brick_path, NULL);
return 0;
}
@@ -1494,9 +1487,8 @@ br_handle_events(void *arg)
child = childev->child;
ret = childev->call(this, child);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED,
- "callback handler for subvolume [%s] failed",
- child->xl->name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED,
+ "name=%s", child->xl->name, NULL);
GF_FREE(childev);
}
@@ -1514,8 +1506,7 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED,
- "Memory accounting init failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, NULL);
return ret;
}
@@ -1532,8 +1523,8 @@ _br_qchild_event(xlator_t *this, br_child_t *child, br_child_handler *call)
childev = GF_CALLOC(1, sizeof(*childev), gf_br_mt_br_child_event_t);
if (!childev) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "Event unhandled for child.. [Brick: %s]", child->xl->name);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_EVENT_UNHANDLED,
+ "Brick-name=%s", child->xl->name, NULL);
return;
}
@@ -1628,10 +1619,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
switch (event) {
case GF_EVENT_CHILD_UP:
if (idx < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
- "Got event %d from "
- "invalid subvolume",
- event);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
+ "event=%d", event, NULL);
goto out;
}
@@ -1659,9 +1648,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
case GF_EVENT_CHILD_DOWN:
if (idx < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRB_MSG_INVALID_SUBVOL_CHILD,
- "Got event %d from invalid subvolume", event);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
+ "event=%d", event, NULL);
goto out;
}
@@ -1702,11 +1690,9 @@ notify(xlator_t *this, int32_t event, void *data, ...)
"called");
if (scrub_monitor->state != BR_SCRUB_STATE_PENDING) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
- "on demand scrub schedule failed. Scrubber is "
- "not in pending state. Current state is %d",
- scrub_monitor->state);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, "Current-state=%d",
+ scrub_monitor->state, NULL);
return -2;
}
@@ -1718,11 +1704,8 @@ notify(xlator_t *this, int32_t event, void *data, ...)
pthread_mutex_unlock(&priv->lock);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
- "Could not schedule ondemand scrubbing. "
- "Scrubbing will continue according to "
- "old frequency.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, NULL);
}
gf_msg_debug(this->name, 0, "returning %d", ret);
break;
@@ -1734,22 +1717,26 @@ out:
return 0;
}
-/**
- * Initialize signer specific structures, spawn worker threads.
- */
-
static void
br_fini_signer(xlator_t *this, br_private_t *priv)
{
int i = 0;
- for (; i < BR_WORKERS; i++) {
+ if (priv == NULL)
+ return;
+
+ for (; i < priv->signer_th_count; i++) {
(void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
}
+ GF_FREE(priv->obj_queue->workers);
pthread_cond_destroy(&priv->object_cond);
}
+/**
+ * Initialize signer specific structures, spawn worker threads.
+ */
+
static int32_t
br_init_signer(xlator_t *this, br_private_t *priv)
{
@@ -1769,13 +1756,17 @@ br_init_signer(xlator_t *this, br_private_t *priv)
goto cleanup_cond;
INIT_LIST_HEAD(&priv->obj_queue->objects);
- for (i = 0; i < BR_WORKERS; i++) {
+ priv->obj_queue->workers = GF_CALLOC(
+ priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t);
+ if (!priv->obj_queue->workers)
+ goto cleanup_obj_queue;
+
+ for (i = 0; i < priv->signer_th_count; i++) {
ret = gf_thread_create(&priv->obj_queue->workers[i], NULL,
br_process_object, this, "brpobj");
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED,
- "thread creation"
- " failed");
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ BRB_MSG_THREAD_CREATION_FAILED, NULL);
ret = -1;
goto cleanup_threads;
}
@@ -1787,7 +1778,9 @@ cleanup_threads:
for (i--; i >= 0; i--) {
(void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
}
+ GF_FREE(priv->obj_queue->workers);
+cleanup_obj_queue:
GF_FREE(priv->obj_queue);
cleanup_cond:
@@ -1840,18 +1833,17 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks)
if (contribution == 0)
contribution = 1;
spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
- spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE;
+ spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE;
#endif
if (!spec.rate)
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
- "[Rate Limit Info] \"FULL THROTTLE\"");
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+ "FULL THROTTLE", NULL);
else
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
- "[Rate Limit Info] \"tokens/sec (rate): %lu, "
- "maxlimit: %lu\"",
- spec.rate, spec.maxlimit);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+ "tokens/sec-rate=%lu", spec.rate, "maxlimit=%lu", spec.maxlimit,
+ NULL);
priv->tbf = tbf_init(&spec, 1);
return priv->tbf ? 0 : -1;
@@ -1860,11 +1852,16 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks)
static int32_t
br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options)
{
- if (options)
+ if (options) {
GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32,
error_return);
- else
+ GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options,
+ uint32, error_return);
+ } else {
GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
+ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
+ error_return);
+ }
return 0;
@@ -1880,6 +1877,8 @@ br_signer_init(xlator_t *this, br_private_t *priv)
GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
GF_OPTION_INIT("brick-count", numbricks, int32, error_return);
+ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
+ error_return);
ret = br_rate_limit_signer(this, priv->child_count, numbricks);
if (ret)
@@ -1966,8 +1965,8 @@ br_init_children(xlator_t *this, br_private_t *priv)
child->timer_pool = mem_pool_new(struct gf_tw_timer_list, 4096);
if (!child->timer_pool) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate mem-pool for timer");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_MEM_POOL_ALLOC,
+ NULL);
errno = ENOMEM;
goto freechild;
}
@@ -1993,15 +1992,13 @@ init(xlator_t *this)
br_private_t *priv = NULL;
if (!this->children) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD,
- "FATAL: no children");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, NULL);
goto out;
}
priv = GF_CALLOC(1, sizeof(*priv), gf_br_mt_br_private_t);
if (!priv) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
- "failed to allocate memory (->priv)");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -2019,8 +2016,8 @@ init(xlator_t *this)
priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
if (!priv->timer_wheel) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE,
- "global timer wheel unavailable");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE,
+ NULL);
goto cleanup;
}
@@ -2042,15 +2039,14 @@ init(xlator_t *this)
ret = gf_thread_create(&priv->thread, NULL, br_handle_events, this,
"brhevent");
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED,
- "thread creation failed");
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_THREAD_CREATION_FAILED,
+ NULL);
ret = -1;
}
if (!ret) {
- gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED,
- "bit-rot xlator loaded in \"%s\" mode",
- (priv->iamscrubber) ? "SCRUBBER" : "SIGNER");
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, "mode=%s",
+ (priv->iamscrubber) ? "SCRUBBER" : "SIGNER", NULL);
return 0;
}
@@ -2097,9 +2093,8 @@ br_reconfigure_monitor(xlator_t *this)
ret = br_scrub_state_machine(this, _gf_false);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_RESCHEDULE_SCRUBBER_FAILED,
- "Could not reschedule scrubber for the volume. Scrubbing "
- "will continue according to old frequency.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB,
+ NULL);
}
}
@@ -2210,5 +2205,28 @@ struct volume_options options[] = {
.description = "Pause/Resume scrub. Upon resume, scrubber "
"continues from where it left off.",
},
+ {
+ .key = {"signer-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = BR_WORKERS,
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Number of signing process threads. As a best "
+ "practice, set this to the number of processor cores",
+ },
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "bit-rot",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
index 962b4d717e6..8ac7dcdac3d 100644
--- a/xlators/features/bit-rot/src/bitd/bit-rot.h
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -11,17 +11,17 @@
#ifndef __BIT_ROT_H__
#define __BIT_ROT_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#include "changelog.h"
#include "timer-wheel.h"
-#include "throttle-tbf.h"
+#include <glusterfs/throttle-tbf.h>
#include "bit-rot-ssm.h"
#include "bit-rot-common.h"
@@ -30,12 +30,6 @@
#include <openssl/sha.h>
-/**
- * TODO: make this configurable. As a best practice, set this to the
- * number of processor cores.
- */
-#define BR_WORKERS 4
-
typedef enum scrub_throttle {
BR_SCRUB_THROTTLE_VOID = -1,
BR_SCRUB_THROTTLE_LAZY = 0,
@@ -108,12 +102,12 @@ struct br_child {
typedef struct br_child br_child_t;
struct br_obj_n_workers {
- struct list_head objects; /* queue of objects expired from the
- timer wheel and ready to be picked
- up for signing */
- pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects
- from the above queue and start
- signing each object */
+ struct list_head objects; /* queue of objects expired from the
+ timer wheel and ready to be picked
+ up for signing */
+ pthread_t *workers; /* Threads which pick up the objects
+ from the above queue and start
+ signing each object */
};
struct br_scrubber {
@@ -209,6 +203,8 @@ struct br_private {
uint32_t expiry_time; /* objects "wait" time */
+ uint32_t signer_th_count; /* Number of signing process threads */
+
tbf_t *tbf; /* token bucket filter */
gf_boolean_t iamscrubber; /* function as a fs scrubber */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h
index ef683ac7f9f..20561aa7764 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-common.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h
@@ -11,7 +11,7 @@
#ifndef __BIT_ROT_COMMON_H__
#define __BIT_ROT_COMMON_H__
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "bit-rot-object-version.h"
#define BR_VXATTR_VERSION (1 << 0)
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
index cb567297b60..8ac13a09941 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
@@ -133,8 +133,8 @@ br_stub_add(xlator_t *this, uuid_t gfid)
* show up less number of objects. That's fine as we'll have
* the log files that will have the missing information.
*/
- gf_msg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL,
- "failed to record gfid [%s]", uuid_utoa(gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL, "gfid=%s",
+ uuid_utoa(gfid), NULL);
}
return 0;
@@ -157,10 +157,8 @@ br_stub_del(xlator_t *this, uuid_t gfid)
uuid_utoa(gfid));
ret = sys_unlink(gfid_path);
if (ret && (errno != ENOENT)) {
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL,
- "%s: failed to delete bad object link from quarantine "
- "directory",
- gfid_path);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL,
+ "path=%s", gfid_path, NULL);
ret = -errno;
goto out;
}
@@ -200,13 +198,13 @@ br_stub_check_stub_directory(xlator_t *this, char *fullpath)
}
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
- "failed to create stub directory [%s]", fullpath);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "create-path=%s", fullpath, NULL);
return ret;
error_return:
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
- "Failed to verify stub directory [%s]", fullpath);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "verify-path=%s", fullpath, NULL);
return -1;
}
@@ -231,8 +229,8 @@ br_stub_check_stub_file(xlator_t *this, char *path)
goto error_return;
fd = sys_creat(path, 0);
if (fd < 0)
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
- "Failed to create stub file [%s]", path);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ BRS_MSG_BAD_OBJECT_DIR_FAIL, "create-path=%s", path, NULL);
}
if (fd >= 0) {
@@ -243,8 +241,8 @@ br_stub_check_stub_file(xlator_t *this, char *path)
return ret;
error_return:
- gf_msg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
- "Failed to verify stub file [%s]", path);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "verify-path=%s", path, NULL);
return -1;
}
@@ -463,12 +461,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
seekdir(dir, off);
#ifndef GF_LINUX_HOST_OS
if ((u_long)telldir(dir) != off && off != fctx->bad_object.dir_eof) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL,
- "seekdir(0x%llx) failed on dir=%p: "
- "Invalid argument (offset reused from "
- "another DIR * structure?)",
- off, dir);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "off=(0x%llx)", off,
+ "dir=%p", dir, NULL);
errno = EINVAL;
count = -1;
goto out;
@@ -480,9 +475,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
in_case = (u_long)telldir(dir);
if (in_case == -1) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL,
- "telldir failed on dir=%p: %s", dir, strerror(errno));
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL, "dir=%p", dir, "err=%s",
+ strerror(errno), NULL);
goto out;
}
@@ -490,9 +485,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
entry = sys_readdir(dir, scratch);
if (!entry || errno != 0) {
if (errno == EBADF) {
- gf_msg(THIS->name, GF_LOG_WARNING, 0,
- BRS_MSG_BAD_OBJECT_DIR_READ_FAIL,
- "readdir failed on dir=%p: %s", dir, strerror(errno));
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0,
+ BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, "dir=%p", dir,
+ "err=%s", strerror(errno), NULL);
goto out;
}
break;
@@ -514,12 +509,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
#ifndef GF_LINUX_HOST_OS
if ((u_long)telldir(dir) != in_case &&
in_case != fctx->bad_object.dir_eof) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0,
- BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL,
- "seekdir(0x%llx) failed on dir=%p: "
- "Invalid argument (offset reused from "
- "another DIR * structure?)",
- in_case, dir);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "in_case=(0x%llx)",
+ in_case, "dir=%p", dir, NULL);
errno = EINVAL;
count = -1;
goto out;
@@ -531,9 +523,9 @@ br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
this_entry = gf_dirent_for_name(entry->d_name);
if (!this_entry) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY,
- "could not create gf_dirent for entry %s: (%s)",
- entry->d_name, strerror(errno));
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CREATE_GF_DIRENT_FAILED, "entry-name=%s",
+ entry->d_name, "err=%s", strerror(errno), NULL);
goto out;
}
/*
@@ -580,8 +572,8 @@ br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = br_stub_fd_ctx_get(this, fd);
if (!fctx) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED,
- "pfd is NULL, fd=%p", fd);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED,
+ "fd=%p", fd, NULL);
op_errno = -ret;
goto done;
}
@@ -589,8 +581,8 @@ br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
dir = fctx->bad_object.dir;
if (!dir) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL,
- "dir is NULL for fd=%p", fd);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL,
+ "fd=%p", fd, NULL);
op_errno = EINVAL;
goto done;
}
@@ -680,10 +672,7 @@ br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
* be shown.
*/
if (!tmp_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY,
- "failed to allocate new dict for saving the paths "
- "of the corrupted objects. Scrub status will only "
- "display the gfid");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_FAILED, NULL);
goto out;
}
}
@@ -707,9 +696,8 @@ br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
uuid_utoa(gfid), hpath);
br_stub_entry_xattr_fill(this, hpath, entry, tmp_dict);
} else
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
- "failed to get the path for the inode %s",
- uuid_utoa_r(gfid, str_gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "gfid=%s", uuid_utoa_r(gfid, str_gfid), NULL);
inode = NULL;
hpath = NULL;
@@ -744,10 +732,8 @@ br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid,
inode, path, _gf_true);
if (ret < 0)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
- "failed to get the path xattr from disk for the "
- " gfid %s. Trying to get path from the memory",
- uuid_utoa_r(gfid, gfid_str));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL);
/*
* Try with soft resolution of path if hard resolve fails. Because
@@ -768,9 +754,8 @@ br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid,
inode, path, _gf_false);
if (ret < 0)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
- "failed to get the path from the memory for gfid %s",
- uuid_utoa_r(gfid, gfid_str));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "from-memory gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL);
}
out:
@@ -804,10 +789,8 @@ br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry,
ret = dict_set_dynstr(dict, entry->d_name, hpath);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED,
- "failed to set the actual path %s as the value in the "
- "dict for the corrupted object %s",
- hpath, entry->d_name);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED,
+ "path=%s", hpath, "object-name=%s", entry->d_name, NULL);
out:
return;
}
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
index a3e7b03291e..9d93caf069f 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _BR_MEM_TYPES_H
#define _BR_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum br_mem_types {
gf_br_stub_mt_private_t = gf_common_mt_end + 1,
@@ -29,6 +29,7 @@ enum br_mem_types {
gf_br_stub_mt_sigstub_t,
gf_br_mt_br_child_event_t,
gf_br_stub_mt_misc,
+ gf_br_mt_br_worker_t,
gf_br_stub_mt_end,
};
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
index 0e22f74f9cf..6c15a166f18 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -11,7 +11,7 @@
#ifndef _BITROT_STUB_MESSAGES_H_
#define _BITROT_STUB_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -39,6 +39,79 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL);
+ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
+ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL,
+ BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED,
+ BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED,
+ BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED,
+ BRS_MSG_VERSION_PREPARE_FAIL);
+#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
+#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed"
+#define BRS_MSG_USING_DEFAULT_THREAD_SIZE_STR "Using default thread stack size"
+#define BRS_MSG_NO_CHILD_STR "FATAL: no children"
+#define BRS_MSG_SPAWN_SIGN_THRD_FAILED_STR \
+ "failed to create the new thread for signer"
+#define BRS_MSG_BAD_CONTAINER_FAIL_STR \
+ "failed to launch the thread for storing bad gfids"
+#define BRS_MSG_CANCEL_SIGN_THREAD_FAILED_STR \
+ "Could not cancel sign serializer thread"
+#define BRS_MSG_KILL_SIGN_THREAD_STR "killed the signer thread"
+#define BRS_MSG_GET_INODE_CONTEXT_FAILED_STR \
+ "failed to init the inode context for the inode"
+#define BRS_MSG_ADD_FD_TO_INODE_STR "failed to add fd to the inode"
+#define BRS_MSG_NO_MEMORY_STR "local allocation failed"
+#define BRS_MSG_BAD_OBJECT_ACCESS_STR "bad object accessed. Returning"
+#define BRS_MSG_SIGN_VERSION_ERROR_STR "Signing version exceeds current version"
+#define BRS_MSG_NON_BITD_PID_STR \
+ "PID from where signature request came, does not belong to bit-rot " \
+ "daemon. Unwinding the fop"
+#define BRS_MSG_SIGN_PREPARE_FAIL_STR \
+ "failed to prepare the signature. Unwinding the fop"
+#define BRS_MSG_VERSION_PREPARE_FAIL_STR \
+ "failed to prepare the version. Unwinding the fop"
+#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding"
+#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad"
+#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \
+ "bad object marking is not from the scrubber"
+#define BRS_MSG_ALLOC_MEM_FAILED_STR "failed to allocate memory"
+#define BRS_MSG_SET_INTERNAL_XATTR_STR "called on the internal xattr"
+#define BRS_MSG_REMOVE_INTERNAL_XATTR_STR "removexattr called on internal xattr"
+#define BRS_MSG_CREATE_ANONYMOUS_FD_FAILED_STR \
+ "failed to create anonymous fd for the inode"
+#define BRS_MSG_ADD_FD_TO_LIST_FAILED_STR "failed add fd to the list"
+#define BRS_MSG_SET_FD_CONTEXT_FAILED_STR \
+ "failed to set the fd context for the file"
+#define BRS_MSG_NULL_LOCAL_STR "local is NULL"
+#define BRS_MSG_DICT_ALLOC_FAILED_STR \
+ "dict allocation failed: cannot send IPC FOP to changelog"
+#define BRS_MSG_SET_EVENT_FAILED_STR "cannot set release event in dict"
+#define BRS_MSG_CREATE_FRAME_FAILED_STR "create_frame() failure"
+#define BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL_STR "closedir error"
+#define BRS_MSG_LINK_FAIL_STR "failed to record gfid"
+#define BRS_MSG_BAD_OBJ_UNLINK_FAIL_STR \
+ "failed to delete bad object link from quaratine directory"
+#define BRS_MSG_BAD_OBJECT_DIR_FAIL_STR "failed stub directory"
+#define BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL_STR \
+ "seekdir failed. Invalid argument (offset reused from another DIR * " \
+ "structure)"
+#define BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL_STR "telldir failed on dir"
+#define BRS_MSG_BAD_OBJECT_DIR_READ_FAIL_STR "readdir failed on dir"
+#define BRS_MSG_CREATE_GF_DIRENT_FAILED_STR "could not create gf_dirent"
+#define BRS_MSG_GET_FD_CONTEXT_FAILED_STR "pfd is NULL"
+#define BRS_MSG_BAD_HANDLE_DIR_NULL_STR "dir if NULL"
+#define BRS_MSG_ALLOC_FAILED_STR \
+ "failed to allocate new dict for saving the paths of the corrupted " \
+ "objects. Scrub status will only display the gfid"
+#define BRS_MSG_PATH_GET_FAILED_STR "failed to get the path"
+#define BRS_MSG_PATH_XATTR_GET_FAILED_STR \
+ "failed to get the path xattr from disk for the gfid. Trying to get path " \
+ "from the memory"
+#define BRS_MSG_DICT_SET_FAILED_STR \
+ "failed to set the actual path as the value in the dict for the " \
+ "corrupted object"
+#define BRS_MSG_SET_CONTEXT_FAILED_STR \
+ "could not set fd context for release callback"
+#define BRS_MSG_CHANGE_VERSION_FAILED_STR "change version failed"
#endif /* !_BITROT_STUB_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
index 0fc2b651b2d..447dd47ff41 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -12,12 +12,11 @@
#include <sys/uio.h>
#include <signal.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
#include "changelog.h"
-#include "compat-errno.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/call-stub.h>
#include "bit-rot-stub.h"
#include "bit-rot-stub-mem-types.h"
@@ -26,6 +25,15 @@
#define BR_STUB_REQUEST_COOKIE 0x1
+void
+br_stub_lock_cleaner(void *arg)
+{
+ pthread_mutex_t *clean_mutex = arg;
+
+ pthread_mutex_unlock(clean_mutex);
+ return;
+}
+
void *
br_stub_signth(void *);
@@ -48,8 +56,7 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED,
- "Memory accounting init failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED, NULL);
return ret;
}
@@ -64,29 +71,29 @@ br_stub_bad_object_container_init(xlator_t *this, br_stub_private_t *priv)
ret = pthread_cond_init(&priv->container.bad_cond, NULL);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
- "pthread_cond_init failed (%d)", ret);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "cond_init ret=%d", ret, NULL);
goto out;
}
ret = pthread_mutex_init(&priv->container.bad_lock, NULL);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
- "pthread_mutex_init failed (%d)", ret);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "mutex_init ret=%d", ret, NULL);
goto cleanup_cond;
}
ret = pthread_attr_init(&w_attr);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
- "pthread_attr_init failed (%d)", ret);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "attr_init ret=%d", ret, NULL);
goto cleanup_lock;
}
ret = pthread_attr_setstacksize(&w_attr, BAD_OBJECT_THREAD_STACK_SIZE);
if (ret == EINVAL) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
- "Using default thread stack size");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_USING_DEFAULT_THREAD_SIZE, NULL);
}
INIT_LIST_HEAD(&priv->container.bad_queue);
@@ -122,8 +129,7 @@ init(xlator_t *this)
br_stub_private_t *priv = NULL;
if (!this->children) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD,
- "FATAL: no children");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD, NULL);
goto error_return;
}
@@ -161,16 +167,20 @@ init(xlator_t *this)
* assigned inside the thread. So setting this->private here.
*/
this->private = priv;
+ if (!priv->do_versioning)
+ return 0;
ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
"brssign");
- if (ret != 0)
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
+ NULL);
goto cleanup_lock;
+ }
ret = br_stub_bad_object_container_init(this, priv);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL,
- "failed to launch the thread for storing bad gfids");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, NULL);
goto cleanup_lock;
}
@@ -183,6 +193,7 @@ cleanup_lock:
pthread_mutex_destroy(&priv->lock);
free_mempool:
mem_pool_destroy(priv->local_pool);
+ priv->local_pool = NULL;
free_priv:
GF_FREE(priv);
this->private = NULL;
@@ -211,10 +222,62 @@ reconfigure(xlator_t *this, dict_t *options)
priv = this->private;
- GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, out);
+ GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
+ if (priv->do_versioning && !priv->signth) {
+ ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
+ "brssign");
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, NULL);
+ goto err;
+ }
+
+ ret = br_stub_bad_object_container_init(this, priv);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL,
+ NULL);
+ goto err;
+ }
+ } else {
+ if (priv->signth) {
+ if (gf_thread_cleanup_xint(priv->signth)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
+ NULL);
+ priv->signth = 0;
+ }
+ }
+
+ if (priv->container.thread) {
+ if (gf_thread_cleanup_xint(priv->container.thread)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->container.thread = 0;
+ }
+ }
ret = 0;
-out:
+ return ret;
+err:
+ if (priv->signth) {
+ if (gf_thread_cleanup_xint(priv->signth)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->signth = 0;
+ }
+
+ if (priv->container.thread) {
+ if (gf_thread_cleanup_xint(priv->container.thread)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->container.thread = 0;
+ }
+ ret = -1;
return ret;
}
@@ -245,10 +308,13 @@ fini(xlator_t *this)
if (!priv)
return;
+ if (!priv->do_versioning)
+ goto cleanup;
+
ret = gf_thread_cleanup_xint(priv->signth);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
- "Could not cancel sign serializer thread");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ NULL);
goto out;
}
priv->signth = 0;
@@ -262,13 +328,10 @@ fini(xlator_t *this)
GF_FREE(sigstub);
}
- pthread_mutex_destroy(&priv->lock);
- pthread_cond_destroy(&priv->cond);
-
ret = gf_thread_cleanup_xint(priv->container.thread);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
- "Could not cancel sign serializer thread");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ NULL);
goto out;
}
@@ -280,14 +343,18 @@ fini(xlator_t *this)
call_stub_destroy(stub);
}
+ pthread_mutex_destroy(&priv->container.bad_lock);
+ pthread_cond_destroy(&priv->container.bad_cond);
+
+cleanup:
+ pthread_mutex_destroy(&priv->lock);
+ pthread_cond_destroy(&priv->cond);
+
if (priv->local_pool) {
mem_pool_destroy(priv->local_pool);
priv->local_pool = NULL;
}
- pthread_mutex_destroy(&priv->container.bad_lock);
- pthread_cond_destroy(&priv->container.bad_cond);
-
this->private = NULL;
GF_FREE(priv);
@@ -357,8 +424,8 @@ br_stub_prepare_version_request(xlator_t *this, dict_t *dict,
priv = this->private;
br_set_ongoingversion(obuf, oversion, priv->boot);
- return dict_set_static_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
- sizeof(br_version_t));
+ return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
+ sizeof(br_version_t));
}
static int
@@ -369,8 +436,7 @@ br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf,
br_set_signature(sbuf, sign, signaturelen, &size);
- return dict_set_static_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf,
- size);
+ return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size);
}
/**
@@ -510,11 +576,9 @@ br_stub_need_versioning(xlator_t *this, fd_t *fd, gf_boolean_t *versioning,
ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
_gf_true, _gf_false, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to "
- " init the inode context for the inode %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
goto error_return;
}
}
@@ -548,10 +612,8 @@ br_stub_anon_fd_ctx(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
if (!br_stub_fd) {
ret = br_stub_add_fd_to_inode(this, fd, ctx);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE,
- "failed to add fd to "
- "the inode (gfid: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
}
@@ -571,9 +633,8 @@ br_stub_versioning_prep(call_frame_t *frame, xlator_t *this, fd_t *fd,
local = br_stub_alloc_local(this);
if (!local) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY,
- "local allocation failed (gfid: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
goto error_return;
}
@@ -643,8 +704,8 @@ br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret,
ret = br_stub_is_bad_object(this, inode);
if (ret == -2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
- "%s is a bad object. Returning", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
*op_ret = -1;
*op_errno = EIO;
}
@@ -653,9 +714,9 @@ br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret,
ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true,
_gf_false, NULL);
if (ret) {
- gf_msg(
- this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to init inode context for %s", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(inode->gfid), NULL);
*op_ret = -1;
*op_errno = EINVAL;
}
@@ -792,23 +853,27 @@ br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame,
op_errno = ENOMEM;
dict = dict_new();
if (!dict)
- goto done;
+ goto out;
ret = br_stub_alloc_versions(&obuf, NULL, 0);
- if (ret)
- goto dealloc_dict;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ br_stub_dealloc_versions(obuf);
+ goto out;
+ }
ret = br_stub_fd_versioning(
this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk,
writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE);
-
-dealloc_versions:
- br_stub_dealloc_versions(obuf);
-dealloc_dict:
- dict_unref(dict);
-done:
+out:
+ if (dict)
+ dict_unref(dict);
if (ret) {
if (local)
frame->local = NULL;
@@ -846,6 +911,24 @@ br_stub_signth(void *arg)
THIS = this;
while (1) {
+ /*
+ * Disabling bit-rot feature leads to this particular thread
+ * getting cleaned up by reconfigure via a call to the function
+ * gf_thread_cleanup_xint (which in turn calls pthread_cancel
+ * and pthread_join). But, if this thread had held the mutex
+ * &priv->lock at the time of cancellation, then it leads to
+ * deadlock in future when bit-rot feature is enabled (which
+ * again spawns this thread which cant hold the lock as the
+ * mutex is still held by the previous instance of the thread
+ * which got killed). Also, the br_stub_handle_object_signature
+ * function which is called whenever file has to be signed
+ * also gets blocked as it too attempts to acquire &priv->lock.
+ *
+ * So, arrange for the lock to be unlocked as part of the
+ * cleanup of this thread using pthread_cleanup_push and
+ * pthread_cleanup_pop.
+ */
+ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
pthread_mutex_lock(&priv->lock);
{
while (list_empty(&priv->squeue))
@@ -856,6 +939,7 @@ br_stub_signth(void *arg)
list_del_init(&sigstub->list);
}
pthread_mutex_unlock(&priv->lock);
+ pthread_cleanup_pop(0);
call_resume(sigstub->stub);
@@ -931,10 +1015,9 @@ br_stub_compare_sign_version(xlator_t *this, inode_t *inode,
if (invalid) {
ret = -1;
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR,
- "Signing version exceeds "
- "current version [%lu > %lu]",
- sbuf->signedversion, ctx->currentversion);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR,
+ "Signing-ver=%lu", sbuf->signedversion, "current-ver=%lu",
+ ctx->currentversion, NULL);
}
out:
@@ -945,31 +1028,36 @@ static int
br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode,
br_isignature_t *sign, int *fakesuccess)
{
- int32_t ret = 0;
+ int32_t ret = -1;
size_t signaturelen = 0;
br_signature_t *sbuf = NULL;
if (!br_is_signature_type_valid(sign->signaturetype))
- goto error_return;
+ goto out;
signaturelen = sign->signaturelen;
ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen);
- if (ret)
- goto error_return;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ goto out;
+ }
ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen);
- if (ret)
- goto dealloc_versions;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ br_stub_dealloc_versions(sbuf);
+ goto out;
+ }
+ /* At this point sbuf has been added to dict, so the memory will be freed
+ * when the data from the dict is destroyed
+ */
ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess);
- if (ret)
- goto dealloc_versions;
-
- return 0;
-
-dealloc_versions:
- br_stub_dealloc_versions(sbuf);
-error_return:
- return -1;
+out:
+ return ret;
}
static void
@@ -986,12 +1074,18 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
- if (frame->root->pid != GF_CLIENT_PID_BITD)
+ if (frame->root->pid != GF_CLIENT_PID_BITD) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
+ "PID=%d", frame->root->pid, NULL);
goto dofop;
+ }
ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto dofop;
+ }
if (fakesuccess) {
op_ret = op_errno = 0;
goto dofop;
@@ -1141,10 +1235,8 @@ br_stub_handle_object_reopen(call_frame_t *frame, xlator_t *this, fd_t *fd,
stub = fop_fsetxattr_cbk_stub(frame, br_stub_fsetxattr_resume, 0, 0, NULL);
if (!stub) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
- "failed to allocate stub for fsetxattr fop (gfid: %s),"
- " unwinding",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto cleanup_local;
}
@@ -1198,9 +1290,8 @@ br_stub_fsetxattr_bad_object_cbk(call_frame_t *frame, void *cookie,
*/
ret = br_stub_mark_object_bad(this, local->u.context.inode);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL,
- "failed to mark object %s as bad",
- uuid_utoa(local->u.context.inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL,
+ "gfid=%s", uuid_utoa(local->u.context.inode->gfid), NULL);
ret = br_stub_add(this, local->u.context.inode->gfid);
@@ -1220,18 +1311,15 @@ br_stub_handle_bad_object_key(call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t op_errno = EINVAL;
if (frame->root->pid != GF_CLIENT_PID_SCRUB) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK,
- "bad object marking "
- "on %s is not from the scrubber",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto unwind;
}
local = br_stub_alloc_local(this);
if (!local) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY,
- "failed to allocate memory for fsetxattr on %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
@@ -1270,10 +1358,9 @@ br_stub_handle_internal_xattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
- "setxattr called"
- " on the internal xattr %s for inode %s",
- key, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
+ "setxattr key=%s", key, "inode-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
return 0;
@@ -1291,10 +1378,8 @@ br_stub_dump_xattr(xlator_t *this, dict_t *dict, int *op_errno)
goto out;
}
dict_dump_to_str(dict, dump, BR_STUB_DUMP_STR_SIZE, format);
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
- "fsetxattr called on "
- "internal xattr %s",
- dump);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
+ "fsetxattr dump=%s", dump, NULL);
out:
if (dump) {
GF_FREE(dump);
@@ -1331,6 +1416,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
/* object signature request */
ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
if (!ret) {
+ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
+ uuid_utoa(fd->inode->gfid));
br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
goto done;
}
@@ -1423,10 +1510,8 @@ br_stub_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (!strcmp(BITROT_OBJECT_BAD_KEY, name) ||
!strcmp(BITROT_SIGNING_VERSION_KEY, name) ||
!strcmp(BITROT_CURRENT_VERSION_KEY, name)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
- "removexattr called"
- " on internal xattr %s for file %s",
- name, loc->path);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
+ "name=%s", name, "file-path=%s", loc->path, NULL);
goto unwind;
}
@@ -1448,10 +1533,9 @@ br_stub_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
if (!strcmp(BITROT_OBJECT_BAD_KEY, name) ||
!strcmp(BITROT_SIGNING_VERSION_KEY, name) ||
!strcmp(BITROT_CURRENT_VERSION_KEY, name)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
- "removexattr called"
- " on internal xattr %s for inode %s",
- name, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
+ "name=%s", name, "inode-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
goto unwind;
}
@@ -1476,7 +1560,7 @@ br_stub_listxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret < 0)
goto unwind;
- br_stub_remove_vxattrs(xattr);
+ br_stub_remove_vxattrs(xattr, _gf_true);
unwind:
STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata);
@@ -1537,10 +1621,8 @@ br_stub_is_object_stale(xlator_t *this, call_frame_t *frame, inode_t *inode,
ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the "
- "inode context for %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
goto out;
}
@@ -1655,7 +1737,7 @@ br_stub_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
op_ret = totallen;
delkeys:
- br_stub_remove_vxattrs(xattr);
+ br_stub_remove_vxattrs(xattr, _gf_true);
unwind:
STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata);
@@ -1711,9 +1793,7 @@ br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
void *cookie = NULL;
- uuid_t rootgfid = {
- 0,
- };
+ static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk;
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
@@ -1725,8 +1805,6 @@ br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
- rootgfid[15] = 1;
-
if (!name) {
cbk = br_stub_listxattr_cbk;
goto wind;
@@ -1796,16 +1874,13 @@ br_stub_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
const char *name, dict_t *xdata)
{
void *cookie = NULL;
- uuid_t rootgfid = {
- 0,
- };
+ static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk;
int32_t op_ret = -1;
int32_t op_errno = EINVAL;
br_stub_local_t *local = NULL;
br_stub_private_t *priv = NULL;
- rootgfid[15] = 1;
priv = this->private;
if (!name) {
@@ -2025,10 +2100,8 @@ br_stub_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
offset, flags, iobref, xdata);
if (!stub) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
- "failed to allocate stub for write fop (gfid: %s), "
- "unwinding",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "write gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto cleanup_local;
}
@@ -2141,10 +2214,8 @@ br_stub_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
stub = fop_ftruncate_stub(frame, br_stub_ftruncate_resume, fd, offset,
xdata);
if (!stub) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
- "failed to allocate stub for ftruncate fop (gfid: %s),"
- " unwinding",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "ftruncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto cleanup_local;
}
@@ -2248,10 +2319,8 @@ br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
fd = fd_anonymous(loc->inode);
if (!fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED,
- "failed to create "
- "anonymous fd for the inode %s",
- uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED,
+ "inode-gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto unwind;
}
@@ -2281,10 +2350,8 @@ br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
stub = fop_truncate_stub(frame, br_stub_truncate_resume, loc, offset,
xdata);
if (!stub) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
- "failed to allocate stub for truncate fop (gfid: %s), "
- "unwinding",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "truncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto cleanup_local;
}
@@ -2357,11 +2424,9 @@ br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
_gf_true, _gf_false, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to init the inode context for "
- "the file %s (gfid: %s)",
- loc->path, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto unwind;
}
}
@@ -2380,9 +2445,8 @@ br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ret = br_stub_add_fd_to_inode(this, fd, ctx);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED,
- "failed add fd to the list (gfid: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto unwind;
}
@@ -2413,10 +2477,8 @@ br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
ret = br_stub_require_release_call(this, fd, &br_stub_fd);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED,
- "failed to set the fd "
- "context for the file (gfid: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -2703,17 +2765,37 @@ br_stub_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (!IA_ISREG(entry->d_stat.ia_type))
continue;
+ /*
+ * Readdirp for most part is a bulk lookup for all the entries
+ * present in the directory being read. Ideally, for each
+ * entry, the handling should be similar to that of a lookup
+ * callback. But for now, just keeping this as it has been
+ * until now (which means, this comment has been added much
+ * later as part of a change that wanted to send the flag
+ * of true/false to br_stub_remove_vxattrs to indicate whether
+ * the bad-object xattr should be removed from the entry->dict
+ * or not). Until this change, the function br_stub_remove_vxattrs
+ * was just removing all the xattrs associated with bit-rot-stub
+ * (like version, bad-object, signature etc). But, there are
+ * scenarios where we only want to send bad-object xattr and not
+ * others. So this comment is part of that change which also
+ * mentions about another possible change that might be needed
+ * in future.
+ * But for now, adding _gf_true means functionally its same as
+ * what this function was doing before. Just remove all the stub
+ * related xattrs.
+ */
ret = br_stub_get_inode_ctx(this, entry->inode, &ctxaddr);
if (ret < 0)
ctxaddr = 0;
if (ctxaddr) { /* already has the context */
- br_stub_remove_vxattrs(entry->dict);
+ br_stub_remove_vxattrs(entry->dict, _gf_true);
continue;
}
ret = br_stub_lookup_version(this, entry->inode->gfid, entry->inode,
entry->dict);
- br_stub_remove_vxattrs(entry->dict);
+ br_stub_remove_vxattrs(entry->dict, _gf_true);
if (ret) {
/**
* there's no per-file granularity support in case of
@@ -2849,13 +2931,22 @@ br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t ret = 0;
br_stub_private_t *priv = NULL;
gf_boolean_t ver_enabled = _gf_false;
+ gf_boolean_t remove_bad_file_marker = _gf_true;
BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
priv = this->private;
if (op_ret < 0) {
(void)br_stub_handle_lookup_error(this, inode, op_errno);
- goto unwind;
+
+ /*
+ * If the lookup error is not ENOENT, then it is better
+ * to send the bad file marker to the higher layer (if
+ * it has been set)
+ */
+ if (op_errno != ENOENT)
+ remove_bad_file_marker = _gf_false;
+ goto delkey;
}
BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), delkey);
@@ -2876,7 +2967,13 @@ br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (ret) {
op_ret = -1;
op_errno = EIO;
- goto unwind;
+ /*
+ * This flag ensures that in the label @delkey below,
+ * bad file marker is not removed from the dictinary,
+ * but other virtual xattrs (such as version, signature)
+ * are removed.
+ */
+ remove_bad_file_marker = _gf_false;
}
goto delkey;
}
@@ -2900,11 +2997,11 @@ br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
*/
op_ret = -1;
op_errno = EIO;
- goto unwind;
+ goto delkey;
}
delkey:
- br_stub_remove_vxattrs(xattr);
+ br_stub_remove_vxattrs(xattr, remove_bad_file_marker);
unwind:
STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
postparent);
@@ -3090,8 +3187,7 @@ br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
if (!local) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL,
- "local is NULL");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL, NULL);
goto unwind;
}
inode = local->u.context.inode;
@@ -3109,9 +3205,8 @@ br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* has to be removed manually. Its not a good idea to fail
* the fop, as the object has already been deleted.
*/
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the context for the inode %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
goto unwind;
}
@@ -3154,9 +3249,9 @@ br_stub_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
if (!local) {
op_ret = -1;
op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY,
- "failed to allocate memory for local (path: %s, gfid: %s)",
- loc->path, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_ALLOC_MEM_FAILED,
+ "local path=%s", loc->path, "gfid=%s",
+ uuid_utoa(loc->inode->gfid), NULL);
goto unwind;
}
@@ -3231,23 +3326,21 @@ br_stub_send_ipc_fop(xlator_t *this, fd_t *fd, unsigned long releaseversion,
xdata = dict_new();
if (!xdata) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_NO_MEMORY,
- "dict allocation failed: cannot send IPC FOP "
- "to changelog");
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_DICT_ALLOC_FAILED,
+ NULL);
goto out;
}
ret = dict_set_static_bin(xdata, "RELEASE-EVENT", &ev, CHANGELOG_EV_SIZE);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED,
- "cannot set release event in dict");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED, NULL);
goto dealloc_dict;
}
frame = create_frame(this, this->ctx->pool);
if (!frame) {
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED,
- "create_frame() failure");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED,
+ NULL);
goto dealloc_dict;
}
@@ -3382,8 +3475,8 @@ br_stub_releasedir(xlator_t *this, fd_t *fd)
if (fctx->bad_object.dir) {
ret = sys_closedir(fctx->bad_object.dir);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL,
- "closedir error: %s", strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL,
+ "error=%s", strerror(errno), NULL);
}
GF_FREE(fctx);
@@ -3481,3 +3574,17 @@ struct volume_options options[] = {
.default_value = "{{ brick.path }}"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "bitrot-stub",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
index 9e6492ebdd7..edd79a77e4f 100644
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
@@ -10,20 +10,20 @@
#ifndef __BIT_ROT_STUB_H__
#define __BIT_ROT_STUB_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "call-stub.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
#include "bit-rot-stub-mem-types.h"
-#include "syscall.h"
-#include "common-utils.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
#include "bit-rot-common.h"
#include "bit-rot-stub-messages.h"
#include "glusterfs3-xdr.h"
-#include "syncop.h"
-#include "syncop-utils.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
#define BAD_OBJECT_THREAD_STACK_SIZE ((size_t)(1024 * 1024))
#define BR_STUB_DUMP_STR_SIZE 65536
@@ -222,8 +222,8 @@ br_stub_require_release_call(xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx)
ret = br_stub_fd_ctx_set(this, fd, br_stub_fd);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED,
- "could not set fd context (for release callback");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED,
+ NULL);
else
*fd_ctx = br_stub_fd;
@@ -273,10 +273,9 @@ __br_stub_set_ongoing_version(br_stub_inode_ctx_t *ctx, unsigned long version)
if (ctx->currentversion < version)
ctx->currentversion = version;
else
- gf_msg("bit-rot-stub", GF_LOG_WARNING, 0, BRS_MSG_CHANGE_VERSION_FAILED,
- "current version: %lu"
- "new version: %lu",
- ctx->currentversion, version);
+ gf_smsg("bit-rot-stub", GF_LOG_WARNING, 0,
+ BRS_MSG_CHANGE_VERSION_FAILED, "current version=%lu",
+ ctx->currentversion, "new version=%lu", version, NULL);
}
static inline int
@@ -359,10 +358,18 @@ br_stub_is_internal_xattr(const char *name)
}
static inline void
-br_stub_remove_vxattrs(dict_t *xattr)
+br_stub_remove_vxattrs(dict_t *xattr, gf_boolean_t remove_bad_marker)
{
if (xattr) {
- dict_del(xattr, BITROT_OBJECT_BAD_KEY);
+ /*
+ * When a file is corrupted, bad-object should be
+ * set in the dict. But, other info such as version,
+ * signature etc should not be set. Hence the flag
+ * remove_bad_marker. The consumer should know whether
+ * to send the bad-object info in the dict or not.
+ */
+ if (remove_bad_marker)
+ dict_del(xattr, BITROT_OBJECT_BAD_KEY);
dict_del(xattr, BITROT_CURRENT_VERSION_KEY);
dict_del(xattr, BITROT_SIGNING_VERSION_KEY);
dict_del(xattr, BITROT_SIGNING_XATTR_SIZE_KEY);
@@ -390,9 +397,8 @@ br_stub_is_bad_object(xlator_t *this, inode_t *inode)
ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for the inode %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
bad_object = -1;
goto out;
}
@@ -420,10 +426,8 @@ br_stub_mark_object_bad(xlator_t *this, inode_t *inode)
ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the "
- "inode context for the inode %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
goto out;
}
diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
index 2cdbf1152b9..2da9f2d2a8c 100644
--- a/xlators/features/changelog/lib/examples/python/libgfchangelog.py
+++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
@@ -1,8 +1,10 @@
import os
from ctypes import *
+from ctypes.util import find_library
class Changes(object):
- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL, use_errno=True)
+ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL,
+ use_errno=True)
@classmethod
def geterrno(cls):
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
index c4b9a3df692..c933ec53ed2 100644
--- a/xlators/features/changelog/lib/src/Makefile.am
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -1,7 +1,7 @@
libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
-DDATADIR=\"$(localstatedir)\"
-libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -fpic \
-I../../../src/ -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/features/changelog/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
diff --git a/xlators/features/changelog/lib/src/changelog-lib-messages.h b/xlators/features/changelog/lib/src/changelog-lib-messages.h
index 32b3497d89d..d7fe7274353 100644
--- a/xlators/features/changelog/lib/src/changelog-lib-messages.h
+++ b/xlators/features/changelog/lib/src/changelog-lib-messages.h
@@ -11,7 +11,7 @@
#ifndef _CHANGELOG_LIB_MESSAGES_H_
#define _CHANGELOG_LIB_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -34,7 +34,7 @@ GLFS_MSGID(
CHANGELOG_LIB_MSG_MMAP_FAILED, CHANGELOG_LIB_MSG_MUNMAP_FAILED,
CHANGELOG_LIB_MSG_ASCII_ERROR, CHANGELOG_LIB_MSG_STAT_FAILED,
CHANGELOG_LIB_MSG_GET_XATTR_FAILED, CHANGELOG_LIB_MSG_PUBLISH_ERROR,
- CHANGELOG_LIB_MSG_PARSE_ERROR, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO,
+ CHANGELOG_LIB_MSG_PARSE_ERROR, CHANGELOG_LIB_MSG_MIN_MAX_INFO,
CHANGELOG_LIB_MSG_CLEANUP_ERROR, CHANGELOG_LIB_MSG_UNLINK_FAILED,
CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED,
CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED, CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO,
@@ -43,6 +43,32 @@ GLFS_MSGID(
CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO,
CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED,
- CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, CHANGELOG_LIB_MSG_HIST_FAILED);
+ CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, CHANGELOG_LIB_MSG_HIST_FAILED,
+ CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED,
+ CHANGELOG_LIB_MSG_REQUESTING_INFO, CHANGELOG_LIB_MSG_FINAL_INFO);
+
+#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO_STR "Registering brick"
+#define CHANGELOG_LIB_MSG_RENAME_FAILED_STR "error moving changelog file"
+#define CHANGELOG_LIB_MSG_OPEN_FAILED_STR "cannot open changelog file"
+#define CHANGELOG_LIB_MSG_UNLINK_FAILED_STR "failed to unlink"
+#define CHANGELOG_LIB_MSG_FAILED_TO_RMDIR_STR "failed to rmdir"
+#define CHANGELOG_LIB_MSG_STAT_FAILED_STR "stat failed on changelog file"
+#define CHANGELOG_LIB_MSG_PARSE_ERROR_STR "could not parse changelog"
+#define CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED_STR \
+ "parsing error, ceased publishing..."
+#define CHANGELOG_LIB_MSG_HTIME_ERROR_STR "fop failed on htime file"
+#define CHANGELOG_LIB_MSG_GET_XATTR_FAILED_STR \
+ "error extracting max timstamp from htime file"
+#define CHANGELOG_LIB_MSG_MIN_MAX_INFO_STR "changelogs min max"
+#define CHANGELOG_LIB_MSG_REQUESTING_INFO_STR "Requesting historical changelogs"
+#define CHANGELOG_LIB_MSG_FINAL_INFO_STR "FINAL"
+#define CHANGELOG_LIB_MSG_HIST_FAILED_STR \
+ "Requested changelog range is not available"
+#define CHANGELOG_LIB_MSG_GET_TIME_ERROR_STR "wrong result"
+#define CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO_STR \
+ "Cleaning brick entry for brick"
+#define CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO_STR "Draining event"
+#define CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO_STR "Drained event"
+#define CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO_STR "freeing entry"
#endif /* !_CHANGELOG_MESSAGES_H_ */
diff --git a/xlators/features/changelog/lib/src/gf-changelog-api.c b/xlators/features/changelog/lib/src/gf-changelog-api.c
index 1b6e932596d..81a5cbfec10 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-api.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-api.c
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "compat-uuid.h"
-#include "globals.h"
-#include "glusterfs.h"
-#include "syscall.h"
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
#include "gf-changelog-helpers.h"
#include "gf-changelog-journal.h"
@@ -56,8 +56,8 @@ gf_changelog_done(char *file)
ret = sys_rename(buffer, to_path);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_RENAME_FAILED, "cannot move changelog file",
- "from=%s", file, "to=%s", to_path, NULL);
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s",
+ to_path, NULL);
goto out;
}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
index fd15ec68ab8..75f8a6dfc08 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-helpers.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
@@ -11,13 +11,7 @@
#include "changelog-mem-types.h"
#include "gf-changelog-helpers.h"
#include "changelog-lib-messages.h"
-#include "syscall.h"
-
-ssize_t
-gf_changelog_read_path(int fd, char *buffer, size_t bufsize)
-{
- return sys_read(fd, buffer, bufsize);
-}
+#include <glusterfs/syscall.h>
size_t
gf_changelog_write(int fd, char *buffer, size_t len)
@@ -64,20 +58,7 @@ gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr)
* made a part of libglusterfs.
*/
-static pthread_key_t rl_key;
-static pthread_once_t rl_once = PTHREAD_ONCE_INIT;
-
-static void
-readline_destructor(void *ptr)
-{
- GF_FREE(ptr);
-}
-
-static void
-readline_once(void)
-{
- pthread_key_create(&rl_key, readline_destructor);
-}
+static __thread read_line_t thread_tsd = {};
static ssize_t
my_read(read_line_t *tsd, int fd, char *ptr)
@@ -97,27 +78,6 @@ my_read(read_line_t *tsd, int fd, char *ptr)
return 1;
}
-static int
-gf_readline_init_once(read_line_t **tsd)
-{
- if (pthread_once(&rl_once, readline_once) != 0)
- return -1;
-
- *tsd = pthread_getspecific(rl_key);
- if (*tsd)
- goto out;
-
- *tsd = GF_CALLOC(1, sizeof(**tsd), gf_changelog_mt_libgfchangelog_rl_t);
- if (!*tsd)
- return -1;
-
- if (pthread_setspecific(rl_key, *tsd) != 0)
- return -1;
-
-out:
- return 0;
-}
-
ssize_t
gf_readline(int fd, void *vptr, size_t maxlen)
{
@@ -125,10 +85,7 @@ gf_readline(int fd, void *vptr, size_t maxlen)
size_t rc = 0;
char c = ' ';
char *ptr = NULL;
- read_line_t *tsd = NULL;
-
- if (gf_readline_init_once(&tsd))
- return -1;
+ read_line_t *tsd = &thread_tsd;
ptr = vptr;
for (n = 1; n < maxlen; n++) {
@@ -151,10 +108,7 @@ off_t
gf_lseek(int fd, off_t offset, int whence)
{
off_t off = 0;
- read_line_t *tsd = NULL;
-
- if (gf_readline_init_once(&tsd))
- return -1;
+ read_line_t *tsd = &thread_tsd;
off = sys_lseek(fd, offset, whence);
if (off == -1)
@@ -169,10 +123,7 @@ gf_lseek(int fd, off_t offset, int whence)
int
gf_ftruncate(int fd, off_t length)
{
- read_line_t *tsd = NULL;
-
- if (gf_readline_init_once(&tsd))
- return -1;
+ read_line_t *tsd = &thread_tsd;
if (sys_ftruncate(fd, 0))
return -1;
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
index cfb26a0081e..9c609d33172 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-helpers.h
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
@@ -14,9 +14,9 @@
#include <unistd.h>
#include <dirent.h>
#include <limits.h>
-#include "locking.h"
+#include <glusterfs/locking.h>
-#include <xlator.h>
+#include <glusterfs/xlator.h>
#include "changelog.h"
@@ -205,9 +205,6 @@ typedef struct gf_private {
void *
gf_changelog_process(void *data);
-ssize_t
-gf_changelog_read_path(int fd, char *buffer, size_t bufsize);
-
void
gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr);
diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
index ef46bf50c97..7f6e2329e71 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "compat-uuid.h"
-#include "globals.h"
-#include "glusterfs.h"
-#include "syscall.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
#include "gf-changelog-helpers.h"
@@ -526,9 +526,8 @@ gf_changelog_publish(xlator_t *this, gf_changelog_journal_t *jnl,
ret = sys_rename(to_path, dest);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_RENAME_FAILED,
- "error moving changelog to processing dir", "path=%s", to_path,
- NULL);
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path, "to=%s",
+ dest, NULL);
}
out:
@@ -564,14 +563,14 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
if (ret || !S_ISREG(stbuf.st_mode)) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_STAT_FAILED,
- "stat failed on changelog file", "path=%s", from_path, NULL);
+ "path=%s", from_path, NULL);
goto out;
}
fd1 = open(from_path, O_RDONLY);
if (fd1 < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED,
- "cannot open changelog file", "path=%s", from_path, NULL);
+ "path=%s", from_path, NULL);
goto out;
}
@@ -579,7 +578,7 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd2 < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED,
- "cannot create ascii changelog file", "path=%s", to_path, NULL);
+ "path=%s", to_path, NULL);
goto close_fd;
} else {
ret = gf_changelog_decode(this, jnl, fd1, fd2, &stbuf, &zerob);
@@ -594,9 +593,8 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
ret = sys_rename(to_path, dest);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_RENAME_FAILED,
- "error moving changelog to processing dir", "path=%s",
- to_path, NULL);
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path,
+ "to=%s", dest, NULL);
}
/* remove it from .current if it's an empty file */
@@ -605,9 +603,8 @@ gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
ret = sys_unlink(to_path);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_UNLINK_FAILED,
- "could not unlink empty changelog", "path=%s", to_path,
- NULL);
+ CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=empty changelog",
+ "path=%s", to_path, NULL);
}
}
@@ -828,7 +825,7 @@ gf_changelog_open_dirs(xlator_t *this, gf_changelog_journal_t *jnl)
ret = recursive_rmdir(jnl->jnl_current_dir);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "Failed to rmdir", "path=%s",
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s",
jnl->jnl_current_dir, NULL);
goto out;
}
@@ -849,7 +846,7 @@ gf_changelog_open_dirs(xlator_t *this, gf_changelog_journal_t *jnl)
ret = recursive_rmdir(jnl->jnl_processing_dir);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "Failed to rmdir", "path=%s",
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s",
jnl->jnl_processing_dir, NULL);
goto out;
}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-reborp.c b/xlators/features/changelog/lib/src/gf-changelog-reborp.c
index 132e8511be4..56b11cbb705 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-reborp.c
+++ b/xlators/features/changelog/lib/src/gf-changelog-reborp.c
@@ -15,14 +15,14 @@
#include "changelog-rpc-common.h"
#include "changelog-lib-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
/**
* Reverse socket: actual data transfer handler. Connection
* initiator is PROBER, data transfer is REBORP.
*/
-struct rpcsvc_program *gf_changelog_reborp_programs[];
+static struct rpcsvc_program *gf_changelog_reborp_programs[];
void *
gf_changelog_connection_janitor(void *arg)
@@ -55,9 +55,8 @@ gf_changelog_connection_janitor(void *arg)
ev = &entry->event;
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO,
- "Cleaning brick entry for brick", "brick=%s", entry->brick,
- NULL);
+ CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO, "brick=%s",
+ entry->brick, NULL);
/* 0x0: disable rpc-clnt */
rpc_clnt_disable(RPC_PROBER(entry));
@@ -71,21 +70,19 @@ gf_changelog_connection_janitor(void *arg)
while (!list_empty(&ev->events)) {
event = list_first_entry(&ev->events, struct gf_event, list);
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "Draining event",
- "seq=%lu", event->seq, "payload=%d", event->count, NULL);
+ CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "seq=%lu",
+ event->seq, "payload=%d", event->count, NULL);
GF_FREE(event);
drained++;
}
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "Drained events",
- "num=%lu", drained, NULL);
+ CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, "num=%lu", drained, NULL);
/* 0x3: freeup brick entry */
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, "freeing entry",
- "entry=%p", entry, NULL);
+ CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, "entry=%p", entry, NULL);
LOCK_DESTROY(&entry->statelock);
GF_FREE(entry);
}
@@ -112,9 +109,7 @@ gf_changelog_reborp_rpcsvc_notify(rpcsvc_t *rpc, void *mydata,
ret = sys_unlink(RPC_SOCK(entry));
if (ret != 0)
gf_smsg(this->name, GF_LOG_WARNING, errno,
- CHANGELOG_LIB_MSG_UNLINK_FAILED,
- "failed to unlink "
- "reverse socket",
+ CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=reverse socket",
"path=%s", RPC_SOCK(entry), NULL);
if (entry->connected)
GF_CHANGELOG_INVOKE_CBK(this, entry->connected, entry->brick,
@@ -391,11 +386,10 @@ gf_changelog_reborp_handle_event(rpcsvc_request_t *req)
return gf_changelog_event_handler(req, this, entry);
}
-rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = {
+static rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = {
[CHANGELOG_REV_PROC_EVENT] = {"CHANGELOG EVENT HANDLER",
- CHANGELOG_REV_PROC_EVENT,
- gf_changelog_reborp_handle_event, NULL, 0,
- DRC_NA},
+ gf_changelog_reborp_handle_event, NULL,
+ CHANGELOG_REV_PROC_EVENT, DRC_NA, 0},
};
/**
@@ -404,7 +398,7 @@ rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = {
* and that's required to invoke the callback with the appropriate
* brick path and it's private data.
*/
-struct rpcsvc_program gf_changelog_reborp_prog = {
+static struct rpcsvc_program gf_changelog_reborp_prog = {
.progname = "LIBGFCHANGELOG REBORP",
.prognum = CHANGELOG_REV_RPC_PROCNUM,
.progver = CHANGELOG_REV_RPC_PROCVER,
@@ -413,7 +407,7 @@ struct rpcsvc_program gf_changelog_reborp_prog = {
.synctask = _gf_false,
};
-struct rpcsvc_program *gf_changelog_reborp_programs[] = {
+static struct rpcsvc_program *gf_changelog_reborp_programs[] = {
&gf_changelog_reborp_prog,
NULL,
};
diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.h b/xlators/features/changelog/lib/src/gf-changelog-rpc.h
index 975307b99d3..5c82d6f1c08 100644
--- a/xlators/features/changelog/lib/src/gf-changelog-rpc.h
+++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.h
@@ -11,7 +11,7 @@
#ifndef __GF_CHANGELOG_RPC_H
#define __GF_CHANGELOG_RPC_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "gf-changelog-helpers.h"
#include "changelog-rpc-common.h"
diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
index 42e20915922..57c3d39ef76 100644
--- a/xlators/features/changelog/lib/src/gf-changelog.c
+++ b/xlators/features/changelog/lib/src/gf-changelog.c
@@ -22,11 +22,11 @@
#endif
#include <string.h>
-#include "globals.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "defaults.h"
-#include "syncop.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
#include "gf-changelog-rpc.h"
#include "gf-changelog-helpers.h"
@@ -102,8 +102,8 @@ gf_changelog_ctx_defaults_init(glusterfs_ctx_t *ctx)
if (!ctx->iobuf_pool)
goto free_pool;
- ctx->event_pool = event_pool_new(GF_CHANGELOG_EVENT_POOL_SIZE,
- GF_CHANGELOG_EVENT_THREAD_COUNT);
+ ctx->event_pool = gf_event_pool_new(GF_CHANGELOG_EVENT_POOL_SIZE,
+ GF_CHANGELOG_EVENT_THREAD_COUNT);
if (!ctx->event_pool)
goto free_pool;
@@ -237,9 +237,8 @@ gf_changelog_init_master()
{
int ret = 0;
- mem_pools_init_early();
ret = gf_changelog_init_context();
- mem_pools_init_late();
+ mem_pools_init();
return ret;
}
@@ -574,9 +573,8 @@ gf_changelog_register_generic(struct gf_brick_spec *bricks, int count,
brick = bricks;
while (count--) {
gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, "Registering brick",
- "brick=%s", brick->brick_path, "notify_filter=%d",
- brick->filter, NULL);
+ CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, "brick=%s",
+ brick->brick_path, "notify_filter=%d", brick->filter, NULL);
ret = gf_changelog_register_brick(this, brick, need_order, xl);
if (ret != 0) {
diff --git a/xlators/features/changelog/lib/src/gf-history-changelog.c b/xlators/features/changelog/lib/src/gf-history-changelog.c
index c8a31ebbd73..a16219f3664 100644
--- a/xlators/features/changelog/lib/src/gf-history-changelog.c
+++ b/xlators/features/changelog/lib/src/gf-history-changelog.c
@@ -8,10 +8,10 @@
#endif
#include <string.h>
-#include "globals.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "syscall.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
#include "gf-changelog-helpers.h"
#include "gf-changelog-journal.h"
@@ -79,8 +79,8 @@ gf_history_changelog_done(char *file)
ret = sys_rename(buffer, to_path);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_RENAME_FAILED, "cannot move changelog file",
- "from=%s", file, "to=%s", to_path, NULL);
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s",
+ to_path, NULL);
goto out;
}
@@ -522,8 +522,7 @@ gf_changelog_consume_wrap(void *data)
_gf_true);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_PARSE_ERROR,
- "could not parse changelog", "name=%s", ccd->changelog,
- NULL);
+ "name=%s", ccd->changelog, NULL);
goto out;
}
}
@@ -564,9 +563,6 @@ gf_history_consume(void *data)
{0},
};
gf_changelog_consume_data_t *curr = NULL;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
hist_data = (gf_changelog_history_data_t *)data;
if (hist_data == NULL) {
@@ -612,12 +608,10 @@ gf_history_consume(void *data)
curr->retval = 0;
memset(curr->changelog, '\0', PATH_MAX);
- snprintf(thread_name, sizeof(thread_name), "clogc%03hx",
- ((iter + 1) & 0x3ff));
ret = gf_thread_create(&th_id[iter], NULL,
gf_changelog_consume_wrap, curr,
- thread_name);
+ "clogc%03hx", (iter + 1) & 0x3ff);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, ret,
CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED,
@@ -647,9 +641,8 @@ gf_history_consume(void *data)
curr = &ccd[iter];
if (ccd->retval) {
publish = _gf_false;
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_LIB_MSG_PARSE_ERROR,
- "parsing error, ceased publishing...");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED, NULL);
continue;
}
@@ -728,7 +721,7 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd,
if (ret) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
- "stat() failed on htime file", "path=%s", htime_file, NULL);
+ "op=stat", "path=%s", htime_file, NULL);
goto out;
}
@@ -742,7 +735,7 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd,
if (*fd < 0) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
- "open() failed for htime file", "path=%s", htime_file, NULL);
+ "op=open", "path=%s", htime_file, NULL);
goto out;
}
@@ -751,17 +744,15 @@ gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd,
if (ret < 0) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_GET_XATTR_FAILED,
- "error extracting max timstamp from htime file"
- "path=%s",
- htime_file, NULL);
+ CHANGELOG_LIB_MSG_GET_XATTR_FAILED, "path=%s", htime_file,
+ NULL);
goto out;
}
sscanf(x_value, "%lu:%lu", max_ts, total);
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO,
- "changelogs min max", "min=%lu", *min_ts, "max=%lu", *max_ts,
- "total_changelogs=%lu", *total, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_MIN_MAX_INFO,
+ "min=%lu", *min_ts, "max=%lu", *max_ts, "total_changelogs=%lu",
+ *total, NULL);
ret = 0;
@@ -842,15 +833,14 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
goto out;
}
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_TOTAL_LOG_INFO,
- "Requesting historical changelogs", "start=%lu", start, "end=%lu",
- end, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_REQUESTING_INFO,
+ "start=%lu", start, "end=%lu", end, NULL);
/* basic sanity check */
if (start > end || n_parallel <= 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HIST_FAILED,
- "Sanity check failed", "start=%lu", start, "end=%lu", end,
- "thread_count=%d", n_parallel, NULL);
+ "start=%lu", start, "end=%lu", end, "thread_count=%d",
+ n_parallel, NULL);
ret = -1;
goto out;
}
@@ -864,7 +854,7 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
dirp = sys_opendir(htime_dir);
if (dirp == NULL) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
- "open dir on htime failed", "path=%s", htime_dir, NULL);
+ "op=opendir", "path=%s", htime_dir, NULL);
ret = -1;
goto out;
}
@@ -876,9 +866,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
if (!entry || errno != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_HIST_FAILED,
- "Requested changelog range is not availbale", "start=%lu",
- start, "end=%lu", end, NULL);
+ CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start,
+ "end=%lu", end, NULL);
ret = -2;
break;
}
@@ -916,9 +905,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
if (gf_history_check(fd, from, start, len) != 0) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_LIB_MSG_GET_TIME_ERROR,
- "wrong result for start", "start=%lu", start, "idx=%lu",
- from, NULL);
+ CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=start",
+ "start=%lu", start, "idx=%lu", from, NULL);
goto out;
}
@@ -949,9 +937,8 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
if (gf_history_check(fd, to, end2, len) != 0) {
ret = -1;
gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_LIB_MSG_GET_TIME_ERROR,
- "wrong result for end", "start=%lu", end2, "idx=%lu",
- to, NULL);
+ CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=end",
+ "start=%lu", end2, "idx=%lu", to, NULL);
goto out;
}
@@ -963,9 +950,9 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
if (ret == -1)
goto out;
- gf_smsg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_LIB_MSG_TOTAL_LOG_INFO, "FINAL", "from=%lu", ts1,
- "to=%lu", ts2, "changes=%lu", (to - from + 1), NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_FINAL_INFO,
+ "from=%lu", ts1, "to=%lu", ts2, "changes=%lu",
+ (to - from + 1), NULL);
hist_data = GF_CALLOC(1, sizeof(gf_changelog_history_data_t),
gf_changelog_mt_history_data_t);
@@ -1003,11 +990,9 @@ gf_history_changelog(char *changelog_dir, unsigned long start,
} else { /* end of range check */
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_LIB_MSG_HIST_FAILED,
- "Requested changelog range is not "
- "available. Retrying next HTIME",
- "start=%lu", start, "end=%lu", end, "chlog_min=%lu", min_ts,
- "chlog_max=%lu", max_ts, NULL);
+ CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start,
+ "end=%lu", end, "chlog_min=%lu", min_ts, "chlog_max=%lu",
+ max_ts, NULL);
}
} /* end of readdir() */
diff --git a/xlators/features/changelog/src/changelog-barrier.c b/xlators/features/changelog/src/changelog-barrier.c
index e8d742404df..0fb89ddb127 100644
--- a/xlators/features/changelog/src/changelog-barrier.c
+++ b/xlators/features/changelog/src/changelog-barrier.c
@@ -10,7 +10,7 @@
#include "changelog-helpers.h"
#include "changelog-messages.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
/* Enqueue a stub*/
void
@@ -53,14 +53,14 @@ chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue)
{
call_stub_t *stub = NULL;
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Dequeuing all the changelog barriered fops");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
+ NULL);
while ((stub = __chlog_barrier_dequeue(this, queue)))
call_resume(stub);
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Dequeuing changelog barriered fops is finished");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, NULL);
return;
}
@@ -80,8 +80,7 @@ chlog_barrier_timeout(void *data)
INIT_LIST_HEAD(&queue);
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_ERROR,
- "Disabling changelog barrier because of the timeout.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_TIMEOUT, NULL);
LOCK(&priv->lock);
{
@@ -120,8 +119,8 @@ __chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv)
priv->timer = gf_timer_call_after(this->ctx, priv->timeout,
chlog_barrier_timeout, (void *)this);
if (!priv->timer) {
- gf_msg(this->name, GF_LOG_CRITICAL, 0, CHANGELOG_MSG_BARRIER_ERROR,
- "Couldn't add changelog barrier timeout event.");
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0,
+ CHANGELOG_MSG_TIMEOUT_ADD_FAILED, NULL);
goto out;
}
diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h
index ca42c4c4fe0..26252696d56 100644
--- a/xlators/features/changelog/src/changelog-encoders.h
+++ b/xlators/features/changelog/src/changelog-encoders.h
@@ -11,8 +11,8 @@
#ifndef _CHANGELOG_ENCODERS_H
#define _CHANGELOG_ENCODERS_H
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "changelog-helpers.h"
diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c
index 3ed6ff821d9..aa94459de5a 100644
--- a/xlators/features/changelog/src/changelog-ev-handle.c
+++ b/xlators/features/changelog/src/changelog-ev-handle.c
@@ -134,6 +134,8 @@ changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
changelog_clnt_t *c_clnt = NULL;
changelog_priv_t *priv = NULL;
changelog_ev_selector_t *selection = NULL;
+ uint64_t clntcnt = 0;
+ uint64_t xprtcnt = 0;
crpc = mydata;
this = crpc->this;
@@ -144,6 +146,7 @@ changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
switch (event) {
case RPC_CLNT_CONNECT:
selection = &priv->ev_selection;
+ GF_ATOMIC_INC(priv->clntcnt);
LOCK(&c_clnt->wait_lock);
{
@@ -176,12 +179,23 @@ changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
changelog_set_disconnect_flag(crpc, _gf_true);
}
UNLOCK(&crpc->lock);
+ LOCK(&c_clnt->active_lock);
+ {
+ list_del_init(&crpc->list);
+ }
+ UNLOCK(&c_clnt->active_lock);
break;
case RPC_CLNT_MSG:
case RPC_CLNT_DESTROY:
/* Free up mydata */
changelog_rpc_clnt_unref(crpc);
+ clntcnt = GF_ATOMIC_DEC(priv->clntcnt);
+ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
+ if (this->cleanup_starting) {
+ if (!clntcnt && !xprtcnt)
+ changelog_process_cleanup_event(this);
+ }
break;
case RPC_CLNT_PING:
break;
@@ -211,8 +225,8 @@ changelog_ev_connector(void *data)
changelog_rpc_notify);
if (!crpc->rpc) {
gf_smsg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_RPC_CONNECT_ERROR,
- "failed to connect back", "path=%s", crpc->sock, NULL);
+ CHANGELOG_MSG_RPC_CONNECT_ERROR, "path=%s", crpc->sock,
+ NULL);
crpc->cleanup(crpc);
goto mutex_unlock;
}
@@ -364,9 +378,8 @@ changelog_ev_dispatch(void *data)
ret = rbuf_wait_for_completion(c_clnt->rbuf, opaque, _dispatcher,
c_clnt);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- CHANGELOG_MSG_PUT_BUFFER_FAILED,
- "failed to put buffer after consumption");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_PUT_BUFFER_FAILED, NULL);
}
return NULL;
diff --git a/xlators/features/changelog/src/changelog-ev-handle.h b/xlators/features/changelog/src/changelog-ev-handle.h
index 7e543a0edb3..cc1af58a276 100644
--- a/xlators/features/changelog/src/changelog-ev-handle.h
+++ b/xlators/features/changelog/src/changelog-ev-handle.h
@@ -11,11 +11,11 @@
#ifndef __CHANGELOG_EV_HANDLE_H
#define __CHANGELOG_EV_HANDLE_H
-#include "list.h"
-#include "xlator.h"
+#include <glusterfs/list.h>
+#include <glusterfs/xlator.h>
#include "rpc-clnt.h"
-#include "rot-buffs.h"
+#include <glusterfs/rot-buffs.h>
struct changelog_clnt;
@@ -131,4 +131,6 @@ changelog_ev_queue_connection(changelog_clnt_t *, changelog_rpc_clnt_t *);
void
changelog_ev_cleanup_connections(xlator_t *, changelog_clnt_t *);
+void
+changelog_process_cleanup_event(xlator_t *);
#endif
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
index 9ff9115c40d..e561997d858 100644
--- a/xlators/features/changelog/src/changelog-helpers.c
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "logging.h"
-#include "iobuf.h"
-#include "syscall.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/iobuf.h>
+#include <glusterfs/syscall.h>
#include "changelog-helpers.h"
#include "changelog-encoders.h"
@@ -22,6 +22,7 @@
#include "changelog-encoders.h"
#include "changelog-rpc-common.h"
#include <pthread.h>
+#include <time.h>
static void
changelog_cleanup_free_mutex(void *arg_mutex)
@@ -41,16 +42,15 @@ changelog_thread_cleanup(xlator_t *this, pthread_t thr_id)
/* send a cancel request to the thread */
ret = pthread_cancel(thr_id);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, "could not cancel thread");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL);
goto out;
}
ret = pthread_join(thr_id, &retval);
if ((ret != 0) || (retval != PTHREAD_CANCELED)) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_CANCEL_FAILED,
- "cancel request not adhered as expected");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL);
}
out:
@@ -153,27 +153,6 @@ changelog_init_event_selection(xlator_t *this,
return 0;
}
-int
-changelog_cleanup_event_selection(xlator_t *this,
- changelog_ev_selector_t *selection)
-{
- int j = CHANGELOG_EV_SELECTION_RANGE;
-
- LOCK(&selection->reflock);
- {
- while (j--) {
- if (selection->ref[j] > 0)
- gf_msg(this->name, GF_LOG_WARNING, 0,
- CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF,
- "changelog event selection cleaning up "
- " on active references");
- }
- }
- UNLOCK(&selection->reflock);
-
- return LOCK_DESTROY(&selection->reflock);
-}
-
static void
changelog_perform_dispatch(xlator_t *this, changelog_priv_t *priv, void *mem,
size_t size)
@@ -263,8 +242,7 @@ changelog_write(int fd, char *buffer, size_t len)
}
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer)
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer)
{
char changelog_path[PATH_MAX + 1] = {
0,
@@ -277,8 +255,8 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
int ret = 0;
if (priv->htime_fd == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
- "Htime fd not available for updation");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ "reason=fd not available", NULL);
ret = -1;
goto out;
}
@@ -288,13 +266,13 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
goto out;
}
if (changelog_write(priv->htime_fd, (void *)changelog_path, len + 1) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
- "Htime file content write failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ "reason=write failed", NULL);
ret = -1;
goto out;
}
- len = snprintf(x_value, sizeof(x_value), "%lu:%d", ts,
+ len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts,
priv->rollover_count);
if (len >= sizeof(x_value)) {
ret = -1;
@@ -303,12 +281,12 @@ htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, XATTR_REPLACE)) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
- "Htime xattr updation failed with XATTR_REPLACE",
+ "reason=xattr updation failed", "XATTR_REPLACE=true",
"changelog=%s", changelog_path, NULL);
if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, 0)) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
- "Htime xattr updation failed", "changelog=%s",
+ "reason=xattr updation failed", "changelog=%s",
changelog_path, NULL);
ret = -1;
goto out;
@@ -346,15 +324,15 @@ cl_is_empty(xlator_t *this, int fd)
ret = sys_fstat(fd, &stbuf);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED,
- "Could not stat (CHANGELOG)");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED,
+ NULL);
goto out;
}
ret = sys_lseek(fd, 0, SEEK_SET);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED,
- "Could not lseek (CHANGELOG)");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED,
+ NULL);
goto out;
}
@@ -390,8 +368,8 @@ update_path(xlator_t *this, char *cl_path)
found = strstr(cl_path, up_cl);
if (found == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED,
- "Could not find CHANGELOG in changelog path");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PATH_NOT_FOUND,
+ NULL);
goto out;
} else {
memcpy(found, low_cl, sizeof(low_cl) - 1);
@@ -403,18 +381,22 @@ out:
}
static int
-changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts)
+changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ret = -1;
int notify = 0;
int cl_empty_flag = 0;
+ struct tm *gmt;
+ char yyyymmdd[40];
char ofile[PATH_MAX] = {
0,
};
char nfile[PATH_MAX] = {
0,
};
+ char nfile_dir[PATH_MAX] = {
+ 0,
+ };
changelog_event_t ev = {
0,
};
@@ -422,33 +404,37 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
if (priv->changelog_fd != -1) {
ret = sys_fsync(priv->changelog_fd);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
}
ret = cl_is_empty(this, priv->changelog_fd);
if (ret == 1) {
cl_empty_flag = 1;
} else if (ret == -1) {
/* Log error but proceed as usual */
- gf_msg(this->name, GF_LOG_WARNING, 0,
- CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED,
- "Error detecting empty changelog");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, NULL);
}
sys_close(priv->changelog_fd);
priv->changelog_fd = -1;
}
+ /* Get GMT time. */
+ gmt = gmtime(&ts);
+
+ strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt);
+
(void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
priv->changelog_dir);
- (void)snprintf(nfile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME ".%lu",
- priv->changelog_dir, ts);
+ (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld",
+ priv->changelog_dir, yyyymmdd, ts);
+ (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd);
if (cl_empty_flag == 1) {
ret = sys_unlink(ofile);
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_UNLINK_OP_FAILED,
- "error unlinking empty changelog", "path=%s", ofile, NULL);
+ CHANGELOG_MSG_UNLINK_OP_FAILED, "path=%s", ofile, NULL);
ret = 0; /* Error in unlinking empty changelog should
not break further changelog operation, so
reset return value to 0*/
@@ -456,13 +442,26 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
} else {
ret = sys_rename(ofile, nfile);
+ /* Changelog file rename gets ENOENT when parent dir doesn't exist */
+ if (errno == ENOENT) {
+ ret = mkdir_p(nfile_dir, 0600, _gf_true);
+
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_MKDIR_ERROR, "%s", nfile_dir, NULL);
+ goto out;
+ }
+
+ ret = sys_rename(ofile, nfile);
+ }
+
if (ret && (errno == ENOENT)) {
ret = 0;
goto out;
}
if (ret) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_RENAME_ERROR,
- "error renaming", "from=%s", ofile, "to=%s", nfile, NULL);
+ "from=%s", ofile, "to=%s", nfile, NULL);
}
}
@@ -476,8 +475,8 @@ changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv,
}
ret = htime_update(this, priv, ts, nfile);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
- "could not update htime file");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ NULL);
goto out;
}
}
@@ -501,15 +500,10 @@ out:
{
if (ret) {
priv->bn.bnotify_error = _gf_true;
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED,
- "Fail snapshot because of "
- "previous errors");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, NULL);
} else {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO,
- "Explicit "
- "rollover changelog signaling "
- "bnotify",
"changelog=%s", nfile, NULL);
}
priv->bn.bnotify = _gf_false;
@@ -556,8 +550,8 @@ find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname)
cnt = scandir(ht_dir_path, &namelist, filter_cur_par_dirs, alphasort);
if (cnt < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED,
- "scandir failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED,
+ NULL);
} else if (cnt > 0) {
if (snprintf(ht_file_bname, NAME_MAX, "%s",
namelist[cnt - 1]->d_name) >= NAME_MAX) {
@@ -566,16 +560,15 @@ find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname)
}
if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
strlen(ht_file_bname), 0)) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_FSETXATTR_FAILED,
- "fsetxattr failed: HTIME_CURRENT");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSETXATTR_FAILED, "HTIME_CURRENT", NULL);
ret = -1;
goto out;
}
if (sys_fsync(ht_dir_fd) < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
ret = -1;
goto out;
}
@@ -596,7 +589,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -632,7 +625,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
ht_dir_fd = open(ht_dir_path, O_RDONLY);
if (ht_dir_fd == -1) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "open failed", "path=%s", ht_dir_path, NULL);
+ "path=%s", ht_dir_path, NULL);
ret = -1;
goto out;
}
@@ -640,9 +633,8 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
size = sys_fgetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
sizeof(ht_file_bname));
if (size < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
- "Error extracting"
- " HTIME_CURRENT.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
+ "name=HTIME_CURRENT", NULL);
/* If upgrade scenario, find the latest HTIME.TSTAMP file
* and use the same. If error, create a new HTIME.TSTAMP
@@ -650,20 +642,18 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
*/
cnt = find_current_htime(ht_dir_fd, ht_dir_path, ht_file_bname);
if (cnt <= 0) {
- gf_msg(this->name, GF_LOG_INFO, errno, CHANGELOG_MSG_HTIME_INFO,
- "HTIME_CURRENT not found. Changelog enabled"
- " before init");
+ gf_smsg(this->name, GF_LOG_INFO, errno,
+ CHANGELOG_MSG_NO_HTIME_CURRENT, NULL);
sys_close(ht_dir_fd);
return htime_create(this, priv, ts);
}
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
- "Error extracting"
- " HTIME_CURRENT.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_HTIME_CURRENT_ERROR, NULL);
}
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO,
- "HTIME_CURRENT", "path=%s", ht_file_bname, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_CURRENT, "path=%s",
+ ht_file_bname, NULL);
len = snprintf(ht_file_path, PATH_MAX, "%s/%s", ht_dir_path, ht_file_bname);
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
@@ -676,7 +666,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (ht_file_fd < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "unable to open htime file", "path=%s", ht_file_path, NULL);
+ "path=%s", ht_file_path, NULL);
ret = -1;
goto out;
}
@@ -686,8 +676,8 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
ret = sys_fstat(ht_file_fd, &stat_buf);
if (ret < 0) {
- gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
- "unable to stat htime file", "path=%s", ht_file_path, NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_STAT_ERROR,
+ "path=%s", ht_file_path, NULL);
ret = -1;
goto out;
}
@@ -696,9 +686,7 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
size = sys_fgetxattr(ht_file_fd, HTIME_KEY, x_value, sizeof(x_value));
if (size < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
- "error extracting max"
- " timstamp from htime file",
- "path=%s", ht_file_path, NULL);
+ "name=%s", HTIME_KEY, "path=%s", ht_file_path, NULL);
ret = -1;
goto out;
}
@@ -710,14 +698,11 @@ htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
total1 = stat_buf.st_size / record_len;
if (total != total1) {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO,
- "Mismatch of changelog count. "
- "INIT CASE",
"xattr_total=%lu", total, "size_total=%lu", total1, NULL);
}
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO,
- "INIT CASE", "min=%lu", min_ts, "max=%lu", max_ts,
- "total_changelogs=%lu", total, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, "min=%lu",
+ min_ts, "max=%lu", max_ts, "total_changelogs=%lu", total, NULL);
if (total < total1)
priv->rollover_count = total1 + 1;
@@ -734,7 +719,7 @@ out:
* returns -1 on failure or error
*/
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts)
{
int ht_file_fd = -1;
int ht_dir_fd = -1;
@@ -751,15 +736,13 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
int flags = 0;
int32_t len = 0;
- gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO,
- "Changelog enable: Creating new "
- "HTIME file",
- "name=%lu", ts, NULL);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE,
+ "name=%ld", ts, NULL);
CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
/* get the htime file name in ht_file_path */
- len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%lu", ht_dir_path,
+ len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path,
HTIME_FILE_NAME, ts);
if ((len < 0) || (len >= PATH_MAX)) {
ret = -1;
@@ -771,23 +754,23 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (ht_file_fd < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "unable to create htime file", "path=%s", ht_file_path, NULL);
+ "path=%s", ht_file_path, NULL);
ret = -1;
goto out;
}
if (sys_fsetxattr(ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE,
sizeof(HTIME_INITIAL_VALUE) - 1, 0)) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED,
- "Htime xattr initialization failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_XATTR_INIT_FAILED, NULL);
ret = -1;
goto out;
}
ret = sys_fsync(ht_file_fd);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
- "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
+ NULL);
goto out;
}
@@ -800,26 +783,25 @@ htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts)
ht_dir_fd = open(ht_dir_path, O_RDONLY);
if (ht_dir_fd == -1) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "open failed", "path=%s", ht_dir_path, NULL);
+ "path=%s", ht_dir_path, NULL);
ret = -1;
goto out;
}
- (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%lu",
+ (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld",
HTIME_FILE_NAME, ts);
if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
strlen(ht_file_bname), 0)) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED,
- "fsetxattr failed:"
- " HTIME_CURRENT");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED,
+ " HTIME_CURRENT", NULL);
ret = -1;
goto out;
}
ret = sys_fsync(ht_dir_fd);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
- "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
+ NULL);
goto out;
}
@@ -873,7 +855,7 @@ changelog_snap_open(xlator_t *this, changelog_priv_t *priv)
fd = open(c_snap_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "unable to open file", "path=%s", c_snap_path, NULL);
+ "path=%s", c_snap_path, NULL);
ret = -1;
goto out;
}
@@ -905,8 +887,8 @@ changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv)
int ret = 0;
ret = changelog_snap_open(this, priv);
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO,
- "Now starting to log in call path");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "starting",
+ NULL);
return ret;
}
@@ -926,8 +908,8 @@ changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv)
sys_close(priv->c_snap_fd);
priv->c_snap_fd = -1;
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO,
- "Stopped to log in call path");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "Stopped",
+ NULL);
return ret;
}
@@ -955,9 +937,6 @@ changelog_open_journal(xlator_t *this, changelog_priv_t *priv)
fd = open(changelog_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd < 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
- "unable to open/create changelog file."
- " change-logging will be"
- " inactive",
"path=%s", changelog_path, NULL);
goto out;
}
@@ -980,8 +959,8 @@ out:
}
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale)
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale)
{
int ret = -1;
@@ -1002,21 +981,12 @@ changelog_entry_length()
return sizeof(changelog_log_data_t);
}
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last)
{
- struct timeval tv = {
- 0,
- };
-
cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
-
- if (gettimeofday(&tv, NULL))
- return -1;
-
- cld->cld_roll_time = (unsigned long)tv.tv_sec;
+ cld->cld_roll_time = gf_time();
cld->cld_finale = is_last;
- return 0;
}
int
@@ -1074,11 +1044,10 @@ changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld)
ret = changelog_snap_write_change(priv, buffer, off);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
- "error writing csnap to disk");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
+ "csnap", NULL);
}
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO,
- "Successfully wrote to csnap");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_WROTE_TO_CSNAP, NULL);
ret = 0;
out:
return ret;
@@ -1095,9 +1064,8 @@ changelog_handle_change(xlator_t *this, changelog_priv_t *priv,
ret = changelog_start_next_change(this, priv, cld->cld_roll_time,
cld->cld_finale);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_GET_TIME_OP_FAILED,
- "Problem rolling over changelog(s)");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_GET_TIME_OP_FAILED, NULL);
goto out;
}
@@ -1111,16 +1079,16 @@ changelog_handle_change(xlator_t *this, changelog_priv_t *priv,
if (CHANGELOG_TYPE_IS_FSYNC(cld->cld_type)) {
ret = sys_fsync(priv->changelog_fd);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_FSYNC_OP_FAILED, "fsync failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
}
goto out;
}
ret = priv->ce->encode(this, cld);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
- "error writing changelog to disk");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
+ "changelog", NULL);
}
out:
@@ -1143,6 +1111,7 @@ changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
CHANGELOG_MSG_INODE_NOT_FOUND,
"inode needed for version checking !!!");
+
goto out;
}
@@ -1211,7 +1180,7 @@ changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv)
ret = pthread_mutex_lock(&priv->dm.drain_black_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
- "pthread error", "error=%d", ret, NULL);
+ "error=%d", ret, NULL);
while (priv->dm.black_fop_cnt > 0) {
gf_msg_debug(this->name, 0, "Conditional wait on black fops: %ld",
priv->dm.black_fop_cnt);
@@ -1220,14 +1189,14 @@ changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv)
&priv->dm.drain_black_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED,
- "pthread cond wait failed", "error=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret,
+ NULL);
}
priv->dm.drain_wait_black = _gf_false;
ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
- "pthread error", "error=%d", ret, NULL);
+ "error=%d", ret, NULL);
pthread_cleanup_pop(0);
gf_msg_debug(this->name, 0, "Woke up: Conditional wait on black fops");
}
@@ -1247,7 +1216,7 @@ changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv)
ret = pthread_mutex_lock(&priv->dm.drain_white_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
- "pthread error", "error=%d", ret, NULL);
+ "error=%d", ret, NULL);
while (priv->dm.white_fop_cnt > 0) {
gf_msg_debug(this->name, 0, "Conditional wait on white fops : %ld",
priv->dm.white_fop_cnt);
@@ -1256,14 +1225,14 @@ changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv)
&priv->dm.drain_white_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED,
- "pthread cond wait failed", "error=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret,
+ NULL);
}
priv->dm.drain_wait_white = _gf_false;
ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
if (ret)
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
- "pthread error", "error=%d", ret, NULL);
+ "error=%d", ret, NULL);
pthread_cleanup_pop(0);
gf_msg_debug(this->name, 0, "Woke up: Conditional wait on white fops");
}
@@ -1292,7 +1261,7 @@ changelog_rollover(void *data)
while (1) {
(void)pthread_testcancel();
- tv.tv_sec = time(NULL) + priv->rollover_time;
+ tv.tv_sec = gf_time() + priv->rollover_time;
tv.tv_nsec = 0;
ret = 0; /* Reset ret to zero */
@@ -1315,12 +1284,12 @@ changelog_rollover(void *data)
pthread_cleanup_pop(0);
if (ret == 0) {
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Explicit wakeup on barrier notify");
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
+ NULL);
priv->explicit_rollover = _gf_true;
} else if (ret && ret != ETIMEDOUT) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SELECT_FAILED,
- "pthread_cond_timedwait failed");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_SELECT_FAILED, NULL);
continue;
} else if (ret && ret == ETIMEDOUT) {
gf_msg_debug(this->name, 0, "Wokeup on timeout");
@@ -1373,13 +1342,7 @@ changelog_rollover(void *data)
if (priv->explicit_rollover == _gf_true)
sleep(1);
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_GET_TIME_OP_FAILED,
- "failed to fill rollover data");
- continue;
- }
+ changelog_fill_rollover_data(&cld, _gf_false);
_mask_cancellation();
@@ -1427,9 +1390,8 @@ changelog_fsync_thread(void *data)
ret = changelog_inject_single_event(this, priv, &cld);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_INJECT_FSYNC_FAILED,
- "failed to inject fsync event");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_INJECT_FSYNC_FAILED, NULL);
_unmask_cancellation();
}
@@ -1851,23 +1813,21 @@ changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc,
parent = inode_parent(loc->inode, 0, 0);
if (!parent) {
gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_INODE_NOT_FOUND,
- "Parent inode not found", "gfid=%s",
- uuid_utoa(loc->inode->gfid), NULL);
+ "type=parent", "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto err;
}
CHANGELOG_INIT_NOCHECK(this, *local, loc->inode, loc->inode->gfid, 5);
if (!(*local)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED,
- "changelog local"
- " initiatilization failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED,
+ NULL);
goto err;
}
co = changelog_get_usable_buffer(*local);
if (!co) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_NO_MEMORY,
- "Failed to get buffer");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_GET_BUFFER_FAILED,
+ NULL);
goto err;
}
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
index 10d457e8cf5..38fa7590c32 100644
--- a/xlators/features/changelog/src/changelog-helpers.h
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -11,14 +11,14 @@
#ifndef _CHANGELOG_HELPERS_H
#define _CHANGELOG_HELPERS_H
-#include "locking.h"
-#include "timer.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
#include "pthread.h"
-#include "iobuf.h"
-#include "rot-buffs.h"
+#include <glusterfs/iobuf.h>
+#include <glusterfs/rot-buffs.h>
#include "changelog-misc.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#include "rpcsvc.h"
#include "changelog-ev-handle.h"
@@ -31,7 +31,7 @@
*/
typedef struct changelog_log_data {
/* rollover related */
- unsigned long cld_roll_time;
+ time_t cld_roll_time;
/* reopen changelog? */
gf_boolean_t cld_finale;
@@ -97,12 +97,6 @@ struct changelog_encoder {
typedef struct changelog_time_slice {
/**
- * just in case we need nanosecond granularity some day.
- * field is unused as of now (maybe we'd need it later).
- */
- struct timeval tv_start;
-
- /**
* version of changelog file, incremented each time changes
* rollover.
*/
@@ -190,8 +184,12 @@ typedef struct changelog_ev_selector {
/* changelog's private structure */
struct changelog_priv {
+ /* changelog journalling */
gf_boolean_t active;
+ /* changelog live notifications */
+ gf_boolean_t rpc_active;
+
/* to generate unique socket file per brick */
char *changelog_brick;
@@ -307,6 +305,24 @@ struct changelog_priv {
/* glusterfind dependency to capture paths on deleted entries*/
gf_boolean_t capture_del_path;
+
+ /* Save total no. of listners */
+ gf_atomic_t listnercnt;
+
+ /* Save total no. of xprt are associated with listner */
+ gf_atomic_t xprtcnt;
+
+ /* Save xprt list */
+ struct list_head xprt_list;
+
+ /* Save total no. of client connection */
+ gf_atomic_t clntcnt;
+
+ /* Save cleanup brick in victim */
+ xlator_t *victim;
+
+ /* Status to save cleanup notify status */
+ gf_boolean_t notify_down;
};
struct changelog_local {
@@ -401,11 +417,11 @@ changelog_local_t *
changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
int xtra_records, gf_boolean_t update_flag);
int
-changelog_start_next_change(xlator_t *this, changelog_priv_t *priv,
- unsigned long ts, gf_boolean_t finale);
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale);
int
changelog_open_journal(xlator_t *this, changelog_priv_t *priv);
-int
+void
changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last);
int
changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv,
@@ -429,12 +445,11 @@ changelog_fsync_thread(void *data);
int
changelog_forget(xlator_t *this, inode_t *inode);
int
-htime_update(xlator_t *this, changelog_priv_t *priv, unsigned long ts,
- char *buffer);
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer);
int
-htime_open(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts);
int
-htime_create(xlator_t *this, changelog_priv_t *priv, unsigned long ts);
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts);
/* Geo-Rep snapshot dependency changes */
void
@@ -492,8 +507,6 @@ changelog_deselect_event(xlator_t *, changelog_ev_selector_t *, unsigned int);
int
changelog_init_event_selection(xlator_t *, changelog_ev_selector_t *);
int
-changelog_cleanup_event_selection(xlator_t *, changelog_ev_selector_t *);
-int
changelog_ev_selected(xlator_t *, changelog_ev_selector_t *, unsigned int);
void
changelog_dispatch_event(xlator_t *, changelog_priv_t *, changelog_event_t *);
@@ -656,8 +669,8 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
#define CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, label) \
do { \
if (!priv->active) { \
- gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_NOT_ACTIVE, \
- "Changelog is not active, return success"); \
+ gf_smsg(this->name, GF_LOG_WARNING, 0, \
+ CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, NULL); \
ret = 0; \
goto label; \
} \
@@ -668,7 +681,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
do { \
if (ret) { \
gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
- "pthread error", "error=%d", ret, NULL); \
+ "error=%d", ret, NULL); \
ret = -1; \
goto label; \
} \
@@ -679,7 +692,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
do { \
if (ret) { \
gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
- "pthread error", "error=%d", ret, NULL); \
+ "error=%d", ret, NULL); \
ret = -1; \
flag = _gf_true; \
goto label; \
@@ -691,7 +704,7 @@ resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
do { \
if (ret) { \
gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
- "pthread error", "error=%d", ret, NULL); \
+ "error=%d", ret, NULL); \
ret = -1; \
pthread_mutex_unlock(&mutex); \
goto label; \
diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h
index 1e3786c6298..a2d8a9cbe93 100644
--- a/xlators/features/changelog/src/changelog-mem-types.h
+++ b/xlators/features/changelog/src/changelog-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _CHANGELOG_MEM_TYPES_H
#define _CHANGELOG_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_changelog_mem_types {
gf_changelog_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h
index dbf133ec836..cb0e16c85d8 100644
--- a/xlators/features/changelog/src/changelog-messages.h
+++ b/xlators/features/changelog/src/changelog-messages.h
@@ -11,7 +11,7 @@
#ifndef _CHANGELOG_MESSAGES_H_
#define _CHANGELOG_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -24,7 +24,7 @@
*/
GLFS_MSGID(
- CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_NO_MEMORY,
+ CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_BARRIER_FOP_FAILED,
CHANGELOG_MSG_VOL_MISCONFIGURED, CHANGELOG_MSG_RENAME_ERROR,
CHANGELOG_MSG_READ_ERROR, CHANGELOG_MSG_HTIME_ERROR,
CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
@@ -37,11 +37,11 @@ GLFS_MSGID(
CHANGELOG_MSG_FSYNC_OP_FAILED, CHANGELOG_MSG_TOTAL_LOG_INFO,
CHANGELOG_MSG_SNAP_INFO, CHANGELOG_MSG_SELECT_FAILED,
CHANGELOG_MSG_FCNTL_FAILED, CHANGELOG_MSG_BNOTIFY_INFO,
- CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_NOT_ACTIVE,
+ CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE,
CHANGELOG_MSG_LOCAL_INIT_FAILED, CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, CHANGELOG_MSG_HANDLE_PROBE_ERROR,
CHANGELOG_MSG_SET_FD_CONTEXT, CHANGELOG_MSG_FREEUP_FAILED,
- CHANGELOG_MSG_HTIME_INFO, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
+ CHANGELOG_MSG_RECONFIGURE, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
CHANGELOG_MSG_RPC_BUILD_ERROR, CHANGELOG_MSG_RPC_CONNECT_ERROR,
CHANGELOG_MSG_RPC_START_ERROR, CHANGELOG_MSG_BUFFER_STARVATION_ERROR,
CHANGELOG_MSG_SCAN_DIR_FAILED, CHANGELOG_MSG_FSETXATTR_FAILED,
@@ -52,6 +52,121 @@ GLFS_MSGID(
CHANGELOG_MSG_FSTAT_OP_FAILED, CHANGELOG_MSG_LSEEK_OP_FAILED,
CHANGELOG_MSG_STRSTR_OP_FAILED, CHANGELOG_MSG_UNLINK_OP_FAILED,
CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED,
- CHANGELOG_MSG_READLINK_OP_FAILED, CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED);
+ CHANGELOG_MSG_READLINK_OP_FAILED, CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, CHANGELOG_MSG_MEMORY_INIT_FAILED,
+ CHANGELOG_MSG_NO_MEMORY, CHANGELOG_MSG_HTIME_STAT_ERROR,
+ CHANGELOG_MSG_HTIME_CURRENT_ERROR, CHANGELOG_MSG_BNOTIFY_COND_INFO,
+ CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT,
+ CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR,
+ CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED,
+ CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0,
+ CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY,
+ CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED,
+ CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE,
+ CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET,
+ CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED,
+ CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED,
+ CHANGELOG_MSG_CLEANUP_ALREADY_SET);
+#define CHANGELOG_MSG_BARRIER_FOP_FAILED_STR \
+ "failed to barrier FOPs, disabling changelog barrier"
+#define CHANGELOG_MSG_MEMORY_INIT_FAILED_STR "memory accounting init failed"
+#define CHANGELOG_MSG_NO_MEMORY_STR "failed to create local memory pool"
+#define CHANGELOG_MSG_ENTRY_BUF_INFO_STR \
+ "Entry cannot be captured for gfid, Capturing DATA entry."
+#define CHANGELOG_MSG_PTHREAD_ERROR_STR "pthread error"
+#define CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED_STR "pthread_mutex_init failed"
+#define CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED_STR "pthread_cond_init failed"
+#define CHANGELOG_MSG_HTIME_ERROR_STR "failed to update HTIME file"
+#define CHANGELOG_MSG_HTIME_STAT_ERROR_STR "unable to stat htime file"
+#define CHANGELOG_MSG_HTIME_CURRENT_ERROR_STR "Error extracting HTIME_CURRENT."
+#define CHANGELOG_MSG_UNLINK_OP_FAILED_STR "error unlinking empty changelog"
+#define CHANGELOG_MSG_RENAME_ERROR_STR "error renaming"
+#define CHANGELOG_MSG_MKDIR_ERROR_STR "unable to create directory"
+#define CHANGELOG_MSG_BNOTIFY_INFO_STR \
+ "Explicit rollover changelog signaling bnotify"
+#define CHANGELOG_MSG_BNOTIFY_COND_INFO_STR "Woke up: bnotify conditional wait"
+#define CHANGELOG_MSG_RECONFIGURE_STR "Reconfigure: Changelog Enable"
+#define CHANGELOG_MSG_NO_HTIME_CURRENT_STR \
+ "HTIME_CURRENT not found. Changelog enabled before init"
+#define CHANGELOG_MSG_HTIME_CURRENT_STR "HTIME_CURRENT"
+#define CHANGELOG_MSG_NEW_HTIME_FILE_STR \
+ "Changelog enable: Creating new HTIME file"
+#define CHANGELOG_MSG_FGETXATTR_FAILED_STR "fgetxattr failed"
+#define CHANGELOG_MSG_TOTAL_LOG_INFO_STR "changelog info"
+#define CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED_STR "pthread cond wait failed"
+#define CHANGELOG_MSG_INODE_NOT_FOUND_STR "inode not found"
+#define CHANGELOG_MSG_READLINK_OP_FAILED_STR \
+ "could not read the link from the gfid handle"
+#define CHANGELOG_MSG_OPEN_FAILED_STR "unable to open file"
+#define CHANGELOG_MSG_RPC_CONNECT_ERROR_STR "failed to connect back"
+#define CHANGELOG_MSG_BUFFER_STARVATION_ERROR_STR \
+ "Failed to get buffer for RPC dispatch"
+#define CHANGELOG_MSG_PTHREAD_CANCEL_FAILED_STR "could not cancel thread"
+#define CHANGELOG_MSG_FSTAT_OP_FAILED_STR "Could not stat (CHANGELOG)"
+#define CHANGELOG_MSG_LSEEK_OP_FAILED_STR "Could not lseek (changelog)"
+#define CHANGELOG_MSG_PATH_NOT_FOUND_STR \
+ "Could not find CHANGELOG in changelog path"
+#define CHANGELOG_MSG_FSYNC_OP_FAILED_STR "fsync failed"
+#define CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED_STR \
+ "Error detecting empty changelog"
+#define CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED_STR \
+ "Fail snapshot because of previous errors"
+#define CHANGELOG_MSG_SCAN_DIR_FAILED_STR "scandir failed"
+#define CHANGELOG_MSG_FSETXATTR_FAILED_STR "fsetxattr failed"
+#define CHANGELOG_MSG_XATTR_INIT_FAILED_STR "Htime xattr initialization failed"
+#define CHANGELOG_MSG_SNAP_INFO_STR "log in call path"
+#define CHANGELOG_MSG_WRITE_FAILED_STR "error writing to disk"
+#define CHANGELOG_MSG_WROTE_TO_CSNAP_STR "Successfully wrote to csnap"
+#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)"
+#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify"
+#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed"
+#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event"
+#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \
+ "changelog local initialization failed"
+#define CHANGELOG_MSG_GET_BUFFER_FAILED_STR "Failed to get buffer"
+#define CHANGELOG_MSG_SET_FD_CONTEXT_STR \
+ "could not set fd context(for release cbk)"
+#define CHANGELOG_MSG_DICT_GET_FAILED_STR "Barrier failed"
+#define CHANGELOG_MSG_BARRIER_STATE_NOTIFY_STR "Barrier notification"
+#define CHANGELOG_MSG_BARRIER_ERROR_STR \
+ "Received another barrier off notification while already off"
+#define CHANGELOG_MSG_BARRIER_DISABLED_STR "disabled changelog barrier"
+#define CHANGELOG_MSG_BARRIER_ALREADY_DISABLED_STR \
+ "Changelog barrier already disabled"
+#define CHANGELOG_MSG_BARRIER_ON_ERROR_STR \
+ "Received another barrier on notification when last one is not served yet"
+#define CHANGELOG_MSG_BARRIER_ENABLE_STR "Enabled changelog barrier"
+#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found"
+#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \
+ "Something went wrong in dict_get_str_boolean"
+#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set"
+#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper"
+#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \
+ "translator needs a single subvolume"
+#define CHANGELOG_MSG_VOL_MISCONFIGURED_STR \
+ "dangling volume. please check volfile"
+#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_STR \
+ "Dequeuing all the changelog barriered fops"
+#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED_STR \
+ "Dequeuing changelog barriered fops is finished"
+#define CHANGELOG_MSG_BARRIER_TIMEOUT_STR \
+ "Disabling changelog barrier because of the timeout"
+#define CHANGELOG_MSG_TIMEOUT_ADD_FAILED_STR \
+ "Couldn't add changelog barrier timeout event"
+#define CHANGELOG_MSG_RPC_BUILD_ERROR_STR "failed to build rpc options"
+#define CHANGELOG_MSG_NOTIFY_REGISTER_FAILED_STR "failed to register notify"
+#define CHANGELOG_MSG_RPC_START_ERROR_STR "failed to start rpc"
+#define CHANGELOG_MSG_CREATE_FRAME_FAILED_STR "failed to create frame"
+#define CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED_STR "failed to serialize reply"
+#define CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED_STR "cannot register program"
+#define CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE_STR \
+ "Changelog is not active, return success"
+#define CHANGELOG_MSG_PUT_BUFFER_FAILED_STR \
+ "failed to put buffer after consumption"
+#define CHANGELOG_MSG_CLEANUP_ALREADY_SET_STR \
+ "cleanup_starting flag is already set for xl"
+#define CHANGELOG_MSG_HANDLE_PROBE_ERROR_STR "xdr decoding error"
#endif /* !_CHANGELOG_MESSAGES_H_ */
diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h
index 04d1bdeba03..e2addc09414 100644
--- a/xlators/features/changelog/src/changelog-misc.h
+++ b/xlators/features/changelog/src/changelog-misc.h
@@ -11,8 +11,8 @@
#ifndef _CHANGELOG_MISC_H
#define _CHANGELOG_MISC_H
-#include "glusterfs.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/common-utils.h>
#define CHANGELOG_MAX_TYPE 4
#define CHANGELOG_FILE_NAME "CHANGELOG"
diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c
index 91d6581836a..125246a17e1 100644
--- a/xlators/features/changelog/src/changelog-rpc-common.c
+++ b/xlators/features/changelog/src/changelog-rpc-common.c
@@ -11,7 +11,7 @@
#include "changelog-rpc-common.h"
#include "changelog-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
/**
*****************************************************
Client Interface
@@ -28,7 +28,7 @@ changelog_rpc_poller(void *arg)
{
xlator_t *this = arg;
- (void)event_dispatch(this->ctx->event_pool);
+ (void)gf_event_dispatch(this->ctx->event_pool);
return NULL;
}
@@ -47,10 +47,10 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile,
if (!options)
goto error_return;
- ret = rpc_transport_unix_options_build(&options, sockfile, 0);
+ ret = rpc_transport_unix_options_build(options, sockfile, 0);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR,
- "failed to build rpc options");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR,
+ NULL);
goto dealloc_dict;
}
@@ -60,19 +60,19 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile,
ret = rpc_clnt_register_notify(rpc, fn, cbkdata);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
- "failed to register notify");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL);
goto dealloc_rpc_clnt;
}
ret = rpc_clnt_start(rpc);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
- "failed to start rpc");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
+ NULL);
goto dealloc_rpc_clnt;
}
+ dict_unref(options);
return rpc;
dealloc_rpc_clnt:
@@ -164,8 +164,8 @@ changelog_invoke_rpc(xlator_t *this, struct rpc_clnt *rpc,
frame = create_frame(this, this->ctx->pool);
if (!frame) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED,
- "failed to create frame");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED,
+ NULL);
goto error_return;
}
@@ -238,8 +238,8 @@ changelog_rpc_sumbit_reply(rpcsvc_request_t *req, void *arg,
iob = __changelog_rpc_serialize_reply(req, arg, &iov, xdrproc);
if (!iob)
- gf_msg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
- "failed to serialize reply");
+ gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
+ NULL);
else
iobref_add(iobref, iob);
@@ -260,6 +260,10 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile,
rpcsvc_listener_t *listener = NULL;
rpcsvc_listener_t *next = NULL;
struct rpcsvc_program *prog = NULL;
+ rpc_transport_t *trans = NULL;
+
+ if (!rpc)
+ return;
while (*progs) {
prog = *progs;
@@ -269,22 +273,25 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile,
list_for_each_entry_safe(listener, next, &rpc->listeners, list)
{
- rpcsvc_listener_destroy(listener);
+ if (listener->trans) {
+ trans = listener->trans;
+ rpc_transport_disconnect(trans, _gf_false);
+ }
}
(void)rpcsvc_unregister_notify(rpc, fn, this);
- sys_unlink(sockfile);
- if (rpc->rxpool) {
- mem_pool_destroy(rpc->rxpool);
- rpc->rxpool = NULL;
- }
/* TODO Avoid freeing rpc object in case of brick multiplex
after freeing rpc object svc->rpclock corrupted and it takes
more time to detach a brick
*/
- if (!this->cleanup_starting)
+ if (!this->cleanup_starting) {
+ if (rpc->rxpool) {
+ mem_pool_destroy(rpc->rxpool);
+ rpc->rxpool = NULL;
+ }
GF_FREE(rpc);
+ }
}
rpcsvc_t *
@@ -299,22 +306,25 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata,
if (!cbkdata)
cbkdata = this;
- ret = rpcsvc_transport_unix_options_build(&options, sockfile);
+ options = dict_new();
+ if (!options)
+ return NULL;
+
+ ret = rpcsvc_transport_unix_options_build(options, sockfile);
if (ret)
goto dealloc_dict;
rpc = rpcsvc_init(this, this->ctx, options, 8);
if (rpc == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
- "failed to init rpc");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
+ NULL);
goto dealloc_dict;
}
ret = rpcsvc_register_notify(rpc, fn, cbkdata);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
- "failed to register notify function");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL);
goto dealloc_rpc;
}
@@ -328,11 +338,10 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata,
prog = *progs;
ret = rpcsvc_program_register(rpc, prog, _gf_false);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED,
- "cannot register program "
- "(name: %s, prognum: %d, pogver: %d)",
- prog->progname, prog->prognum, prog->progver);
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, "name%s",
+ prog->progname, "prognum=%d", prog->prognum, "pogver=%d",
+ prog->progver, NULL);
goto dealloc_rpc;
}
diff --git a/xlators/features/changelog/src/changelog-rpc-common.h b/xlators/features/changelog/src/changelog-rpc-common.h
index 2d3f06e60c0..4d9aa2c694b 100644
--- a/xlators/features/changelog/src/changelog-rpc-common.h
+++ b/xlators/features/changelog/src/changelog-rpc-common.h
@@ -13,8 +13,8 @@
#include "rpcsvc.h"
#include "rpc-clnt.h"
-#include "gf-event.h"
-#include "call-stub.h"
+#include <glusterfs/gf-event.h>
+#include <glusterfs/call-stub.h>
#include "changelog-xdr.h"
#include "xdr-generic.h"
diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c
index 828f85e8e45..440b88091a6 100644
--- a/xlators/features/changelog/src/changelog-rpc.c
+++ b/xlators/features/changelog/src/changelog-rpc.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "changelog-rpc.h"
#include "changelog-mem-types.h"
#include "changelog-ev-handle.h"
-struct rpcsvc_program *changelog_programs[];
+static struct rpcsvc_program *changelog_programs[];
static void
changelog_cleanup_dispatchers(xlator_t *this, changelog_priv_t *priv, int count)
@@ -43,9 +43,6 @@ changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv)
/** terminate dispatcher thread(s) */
changelog_cleanup_dispatchers(this, priv, priv->nr_dispatchers);
- /* TODO: what about pending and waiting connections? */
- changelog_ev_cleanup_connections(this, conn);
-
/* destroy locks */
ret = pthread_mutex_destroy(&conn->pending_lock);
if (ret != 0)
@@ -72,9 +69,6 @@ changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf,
int j = 0;
int ret = 0;
changelog_clnt_t *conn = NULL;
- char thread_name[GF_THREAD_NAMEMAX] = {
- 0,
- };
conn = &priv->connections;
@@ -114,9 +108,9 @@ changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf,
/* spawn dispatcher threads */
for (; j < nr_dispatchers; j++) {
- snprintf(thread_name, sizeof(thread_name), "clogd%03hx", (j & 0x3ff));
ret = gf_thread_create(&priv->ev_dispatcher[j], NULL,
- changelog_ev_dispatch, conn, thread_name);
+ changelog_ev_dispatch, conn, "clogd%03hx",
+ j & 0x3ff);
if (ret != 0) {
changelog_cleanup_dispatchers(this, priv, j);
break;
@@ -147,48 +141,146 @@ int
changelog_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
void *data)
{
+ xlator_t *this = NULL;
+ rpc_transport_t *trans = NULL;
+ rpc_transport_t *xprt = NULL;
+ rpc_transport_t *xp_next = NULL;
+ changelog_priv_t *priv = NULL;
+ uint64_t listnercnt = 0;
+ uint64_t xprtcnt = 0;
+ uint64_t clntcnt = 0;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ gf_boolean_t listner_found = _gf_false;
+ socket_private_t *sockpriv = NULL;
+
+ if (!xl || !data || !rpc) {
+ gf_msg_callingfn("changelog", GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED,
+ "Calling rpc_notify without initializing");
+ goto out;
+ }
+
+ this = xl;
+ trans = data;
+ priv = this->private;
+
+ if (!priv) {
+ gf_msg_callingfn("changelog", GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED,
+ "Calling rpc_notify without priv initializing");
+ goto out;
+ }
+
+ if (event == RPCSVC_EVENT_ACCEPT) {
+ GF_ATOMIC_INC(priv->xprtcnt);
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&trans->list, &priv->xprt_list);
+ }
+ UNLOCK(&priv->lock);
+ goto out;
+ }
+
+ if (event == RPCSVC_EVENT_DISCONNECT) {
+ list_for_each_entry_safe(listener, next, &rpc->listeners, list)
+ {
+ if (listener && listener->trans) {
+ if (listener->trans == trans) {
+ listnercnt = GF_ATOMIC_DEC(priv->listnercnt);
+ listner_found = _gf_true;
+ rpcsvc_listener_destroy(listener);
+ }
+ }
+ }
+
+ if (listnercnt > 0) {
+ goto out;
+ }
+ if (listner_found) {
+ LOCK(&priv->lock);
+ list_for_each_entry_safe(xprt, xp_next, &priv->xprt_list, list)
+ {
+ sockpriv = (socket_private_t *)(xprt->private);
+ gf_log("changelog", GF_LOG_INFO,
+ "Send disconnect"
+ " on socket %d",
+ sockpriv->sock);
+ rpc_transport_disconnect(xprt, _gf_false);
+ }
+ UNLOCK(&priv->lock);
+ goto out;
+ }
+ LOCK(&priv->lock);
+ {
+ list_del_init(&trans->list);
+ }
+ UNLOCK(&priv->lock);
+
+ xprtcnt = GF_ATOMIC_DEC(priv->xprtcnt);
+ clntcnt = GF_ATOMIC_GET(priv->clntcnt);
+ if (!xprtcnt && !clntcnt) {
+ changelog_process_cleanup_event(this);
+ }
+ }
+
+out:
return 0;
}
void
+changelog_process_cleanup_event(xlator_t *this)
+{
+ gf_boolean_t cleanup_notify = _gf_false;
+ changelog_priv_t *priv = NULL;
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+
+ if (!this)
+ return;
+ priv = this->private;
+ if (!priv)
+ return;
+
+ LOCK(&priv->lock);
+ {
+ cleanup_notify = priv->notify_down;
+ priv->notify_down = _gf_true;
+ }
+ UNLOCK(&priv->lock);
+
+ if (priv->victim && !cleanup_notify) {
+ default_notify(this, GF_EVENT_PARENT_DOWN, priv->victim);
+
+ if (priv->rpc) {
+ /* sockfile path could have been saved to avoid this */
+ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
+ UNIX_PATH_MAX);
+ sys_unlink(sockfile);
+ (void)rpcsvc_unregister_notify(priv->rpc, changelog_rpcsvc_notify,
+ this);
+ if (priv->rpc->rxpool) {
+ mem_pool_destroy(priv->rpc->rxpool);
+ priv->rpc->rxpool = NULL;
+ }
+ GF_FREE(priv->rpc);
+ priv->rpc = NULL;
+ }
+ }
+}
+
+void
changelog_destroy_rpc_listner(xlator_t *this, changelog_priv_t *priv)
{
char sockfile[UNIX_PATH_MAX] = {
0,
};
- changelog_clnt_t *c_clnt = &priv->connections;
- changelog_rpc_clnt_t *crpc = NULL;
- int nofconn = 0;
/* sockfile path could have been saved to avoid this */
CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX);
changelog_rpc_server_destroy(this, priv->rpc, sockfile,
changelog_rpcsvc_notify, changelog_programs);
-
- /* TODO Below approach is not perfect to wait for cleanup
- all active connections without this code brick process
- can be crash in case of brick multiplexing if any in-progress
- request process on rpc by changelog xlator after
- cleanup resources
- */
-
- if (c_clnt) {
- do {
- nofconn = 0;
- LOCK(&c_clnt->active_lock);
- list_for_each_entry(crpc, &c_clnt->active, list) { nofconn++; }
- UNLOCK(&c_clnt->active_lock);
- LOCK(&c_clnt->wait_lock);
- list_for_each_entry(crpc, &c_clnt->waitq, list) { nofconn++; }
- UNLOCK(&c_clnt->wait_lock);
- pthread_mutex_lock(&c_clnt->pending_lock);
- list_for_each_entry(crpc, &c_clnt->pending, list) { nofconn++; }
- pthread_mutex_unlock(&c_clnt->pending_lock);
-
- } while (nofconn); /* Wait for all connection cleanup */
- }
-
- (void)changelog_cleanup_rpc_threads(this, priv);
}
rpcsvc_t *
@@ -287,16 +379,15 @@ changelog_handle_probe(rpcsvc_request_t *req)
this = req->trans->xl;
if (this->cleanup_starting) {
- gf_msg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR,
- "cleanup_starting flag is already set for xl");
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_CLEANUP_ALREADY_SET,
+ NULL);
return 0;
}
ret = xdr_to_generic(req->msg[0], &rpc_req,
(xdrproc_t)xdr_changelog_probe_req);
if (ret < 0) {
- gf_msg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR,
- "xdr decoding error");
+ gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR, NULL);
req->rpc_err = GARBAGE_ARGS;
goto handle_xdr_error;
}
@@ -328,13 +419,13 @@ submit_rpc:
* RPC declarations
*/
-rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = {
+static rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = {
[CHANGELOG_RPC_PROBE_FILTER] = {"CHANGELOG PROBE FILTER",
- CHANGELOG_RPC_PROBE_FILTER,
- changelog_handle_probe, NULL, 0, DRC_NA},
+ changelog_handle_probe, NULL,
+ CHANGELOG_RPC_PROBE_FILTER, DRC_NA, 0},
};
-struct rpcsvc_program changelog_svc_prog = {
+static struct rpcsvc_program changelog_svc_prog = {
.progname = CHANGELOG_RPC_PROGNAME,
.prognum = CHANGELOG_RPC_PROGNUM,
.progver = CHANGELOG_RPC_PROGVER,
@@ -343,7 +434,7 @@ struct rpcsvc_program changelog_svc_prog = {
.synctask = _gf_true,
};
-struct rpcsvc_program *changelog_programs[] = {
+static struct rpcsvc_program *changelog_programs[] = {
&changelog_svc_prog,
NULL,
};
diff --git a/xlators/features/changelog/src/changelog-rpc.h b/xlators/features/changelog/src/changelog-rpc.h
index 8002cea5091..b1707565249 100644
--- a/xlators/features/changelog/src/changelog-rpc.h
+++ b/xlators/features/changelog/src/changelog-rpc.h
@@ -11,7 +11,7 @@
#ifndef __CHANGELOG_RPC_H
#define __CHANGELOG_RPC_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "changelog-helpers.h"
/* one time */
diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c
index 968c76b8b20..841545ae359 100644
--- a/xlators/features/changelog/src/changelog-rt.c
+++ b/xlators/features/changelog/src/changelog-rt.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "logging.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
#include "changelog-rt.h"
#include "changelog-mem-types.h"
diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h
index df0d5b03487..28b9827d85b 100644
--- a/xlators/features/changelog/src/changelog-rt.h
+++ b/xlators/features/changelog/src/changelog-rt.h
@@ -11,8 +11,8 @@
#ifndef _CHANGELOG_RT_H
#define _CHANGELOG_RT_H
-#include "locking.h"
-#include "timer.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
#include "pthread.h"
#include "changelog-helpers.h"
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
index 35a523316ed..6a6e5af859e 100644
--- a/xlators/features/changelog/src/changelog.c
+++ b/xlators/features/changelog/src/changelog.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
-#include "syscall.h"
-#include "logging.h"
-#include "iobuf.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/iobuf.h>
#include "changelog-rt.h"
@@ -34,6 +34,12 @@ static struct changelog_bootstrap cb_bootstrap[] = {
},
};
+static int
+changelog_init_rpc(xlator_t *this, changelog_priv_t *priv);
+
+static int
+changelog_init(xlator_t *this, changelog_priv_t *priv);
+
/* Entry operations - TYPE III */
/**
@@ -149,9 +155,8 @@ changelog_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
goto out;
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=rmdir", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rmdir", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -298,9 +303,8 @@ changelog_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
goto out;
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=unlink", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=unlink", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -418,9 +422,8 @@ changelog_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
goto out;
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=rename", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rename", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
/* changelog barrier */
@@ -531,8 +534,7 @@ changelog_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_FOP_FAILED,
"fop=link", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -660,9 +662,8 @@ changelog_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=mkdir", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mkdir", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -782,9 +783,8 @@ changelog_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=symlink", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=symlink", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -929,9 +929,8 @@ changelog_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=mknod", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mknod", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -972,8 +971,8 @@ changelog_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
CHANGELOG_OP_TYPE_RELEASE)) {
ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
- "could not set fd context (for release cbk)");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
+ NULL);
}
changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
@@ -1083,9 +1082,8 @@ changelog_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
}
if (barrier_enabled && !stub) {
- gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Failed to barrier FOPs, disabling changelog barrier",
- "fop=create", NULL);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=create", NULL);
chlog_barrier_dequeue_all(this, &queue);
}
@@ -1388,9 +1386,6 @@ changelog_handle_virtual_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
ret = changelog_fill_entry_buf(frame, this, loc, &local);
if (ret) {
gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_ENTRY_BUF_INFO,
- "Entry cannot be"
- " captured for gfid, Capturing DATA"
- " entry.",
"gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto unwind;
}
@@ -1806,8 +1801,8 @@ changelog_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
CHANGELOG_OP_TYPE_RELEASE)) {
ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1);
if (ret)
- gf_msg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
- "could not set fd context (for release cbk)");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
+ NULL);
}
unwind:
@@ -2004,6 +1999,15 @@ notify(xlator_t *this, int event, void *data, ...)
struct list_head queue = {
0,
};
+ uint64_t xprtcnt = 0;
+ uint64_t clntcnt = 0;
+ changelog_clnt_t *conn = NULL;
+ gf_boolean_t cleanup_notify = _gf_false;
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
INIT_LIST_HEAD(&queue);
@@ -2011,6 +2015,50 @@ notify(xlator_t *this, int event, void *data, ...)
if (!priv)
goto out;
+ if (event == GF_EVENT_PARENT_DOWN) {
+ priv->victim = data;
+ gf_log(this->name, GF_LOG_INFO,
+ "cleanup changelog rpc connection of brick %s",
+ priv->victim->name);
+
+ if (priv->rpc_active) {
+ this->cleanup_starting = 1;
+ changelog_destroy_rpc_listner(this, priv);
+ conn = &priv->connections;
+ if (conn)
+ changelog_ev_cleanup_connections(this, conn);
+ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
+ clntcnt = GF_ATOMIC_GET(priv->clntcnt);
+ if (!xprtcnt && !clntcnt) {
+ LOCK(&priv->lock);
+ {
+ cleanup_notify = priv->notify_down;
+ priv->notify_down = _gf_true;
+ }
+ UNLOCK(&priv->lock);
+ if (priv->rpc) {
+ list_for_each_entry_safe(listener, next,
+ &priv->rpc->listeners, list)
+ {
+ if (listener->trans) {
+ rpc_transport_unref(listener->trans);
+ }
+ }
+ rpcsvc_destroy(priv->rpc);
+ priv->rpc = NULL;
+ }
+ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
+ UNIX_PATH_MAX);
+ sys_unlink(sockfile);
+ if (!cleanup_notify)
+ default_notify(this, GF_EVENT_PARENT_DOWN, data);
+ }
+ } else {
+ default_notify(this, GF_EVENT_PARENT_DOWN, data);
+ }
+ goto out;
+ }
+
if (event == GF_EVENT_TRANSLATOR_OP) {
dict = data;
@@ -2018,15 +2066,15 @@ notify(xlator_t *this, int event, void *data, ...)
switch (barrier) {
case DICT_ERROR:
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_DICT_GET_FAILED,
- "Barrier dict_get_str_boolean failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_DICT_GET_FAILED, "dict_get_str_boolean",
+ NULL);
ret = -1;
goto out;
case BARRIER_OFF:
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Barrier off notification");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "off", NULL);
CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out);
LOCK(&priv->c_snap_lock);
@@ -2043,10 +2091,8 @@ notify(xlator_t *this, int event, void *data, ...)
UNLOCK(&priv->bflags.lock);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_BARRIER_ERROR,
- "Received another barrier off"
- " notification while already off");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ERROR, NULL);
goto out;
}
@@ -2064,13 +2110,11 @@ notify(xlator_t *this, int event, void *data, ...)
*/
if (ret == 0) {
chlog_barrier_dequeue_all(this, &queue);
- gf_msg(this->name, GF_LOG_INFO, 0,
- CHANGELOG_MSG_BARRIER_INFO,
- "Disabled changelog barrier");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_DISABLED, NULL);
} else {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_BARRIER_ERROR,
- "Changelog barrier already disabled");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, NULL);
}
LOCK(&priv->bflags.lock);
@@ -2082,8 +2126,8 @@ notify(xlator_t *this, int event, void *data, ...)
goto out;
case BARRIER_ON:
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Barrier on notification");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "on", NULL);
CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out);
LOCK(&priv->c_snap_lock);
@@ -2102,11 +2146,8 @@ notify(xlator_t *this, int event, void *data, ...)
UNLOCK(&priv->bflags.lock);
if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_BARRIER_ERROR,
- "Received another barrier on"
- "notification when last one is"
- "not served yet");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ON_ERROR, NULL);
goto out;
}
@@ -2129,14 +2170,14 @@ notify(xlator_t *this, int event, void *data, ...)
goto out;
}
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
- "Enabled changelog barrier");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_ENABLE, NULL);
ret = changelog_barrier_notify(priv, buf);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_WRITE_FAILED,
- "Explicit roll over: write failed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_WRITE_FAILED, "Explicit roll over",
+ NULL);
changelog_barrier_cleanup(this, priv, &queue);
ret = -1;
goto out;
@@ -2160,21 +2201,20 @@ notify(xlator_t *this, int event, void *data, ...)
}
ret1 = pthread_mutex_unlock(&priv->bn.bnotify_mutex);
CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret1, out, bclean_req);
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO,
- "Woke up: bnotify conditional wait");
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BNOTIFY_COND_INFO, NULL);
goto out;
case DICT_DEFAULT:
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_DICT_GET_FAILED, "barrier key not found");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, NULL);
ret = -1;
goto out;
default:
- gf_msg(this->name, GF_LOG_ERROR, EINVAL,
- CHANGELOG_MSG_DICT_GET_FAILED,
- "Something went bad in dict_get_str_boolean");
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
+ CHANGELOG_MSG_ERROR_IN_DICT_GET, NULL);
ret = -1;
goto out;
}
@@ -2200,9 +2240,8 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, gf_changelog_mt_end + 1);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "Memory accounting"
- " init failed");
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ CHANGELOG_MSG_MEMORY_INIT_FAILED, NULL);
return ret;
}
@@ -2213,23 +2252,11 @@ static int
changelog_init(xlator_t *this, changelog_priv_t *priv)
{
int i = 0;
- int ret = -1;
- struct timeval tv = {
- 0,
- };
+ int ret = 0;
changelog_log_data_t cld = {
0,
};
- ret = gettimeofday(&tv, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_GET_TIME_OP_FAILED, "gettimeofday() failure");
- goto out;
- }
-
- priv->slice.tv_start = tv;
-
priv->maps[CHANGELOG_TYPE_DATA] = "D ";
priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M ";
@@ -2248,9 +2275,7 @@ changelog_init(xlator_t *this, changelog_priv_t *priv)
* in case there was an encoding change. so... things are kept
* simple here.
*/
- ret = changelog_fill_rollover_data(&cld, _gf_false);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, _gf_false);
ret = htime_open(this, priv, cld.cld_roll_time);
/* call htime open with cld's rollover_time */
@@ -2288,8 +2313,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_mutex_init(&priv->bn.bnotify_mutex, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
- "bnotify pthread_mutex_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=bnotify",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2297,8 +2322,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_cond_init(&priv->bn.bnotify_cond, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
- "bnotify pthread_cond_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=bnotify",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2306,8 +2331,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_mutex_init(&priv->dm.drain_black_mutex, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
- "drain_black pthread_mutex_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_black",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2315,8 +2340,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_cond_init(&priv->dm.drain_black_cond, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
- "drain_black pthread_cond_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_black",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2324,8 +2349,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_mutex_init(&priv->dm.drain_white_mutex, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
- "drain_white pthread_mutex_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_white",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2333,8 +2358,8 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((ret = pthread_cond_init(&priv->dm.drain_white_cond, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
- CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
- "drain_white pthread_cond_init failed", "ret=%d", ret, NULL);
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_white",
+ "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2343,7 +2368,7 @@ changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
if ((pthread_mutex_init(&priv->cr.lock, NULL)) != 0) {
gf_smsg(this->name, GF_LOG_ERROR, errno,
CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
- "changelog_rollover lock init failed", "ret=%d", ret, NULL);
+ "name=changelog_rollover", "ret=%d", ret, NULL);
ret = -1;
goto out;
}
@@ -2394,6 +2419,22 @@ changelog_barrier_pthread_destroy(changelog_priv_t *priv)
LOCK_DESTROY(&priv->bflags.lock);
}
+static void
+changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
+{
+ /* terminate rpc server */
+ if (!this->cleanup_starting)
+ changelog_destroy_rpc_listner(this, priv);
+
+ (void)changelog_cleanup_rpc_threads(this, priv);
+ /* cleanup rot buffs */
+ rbuf_dtor(priv->rbuf);
+
+ /* cleanup poller thread */
+ if (priv->poller)
+ (void)changelog_thread_cleanup(this, priv->poller);
+}
+
int
reconfigure(xlator_t *this, dict_t *options)
{
@@ -2402,6 +2443,9 @@ reconfigure(xlator_t *this, dict_t *options)
changelog_priv_t *priv = NULL;
gf_boolean_t active_earlier = _gf_true;
gf_boolean_t active_now = _gf_true;
+ gf_boolean_t rpc_active_earlier = _gf_true;
+ gf_boolean_t rpc_active_now = _gf_true;
+ gf_boolean_t iniate_rpc = _gf_false;
changelog_time_slice_t *slice = NULL;
changelog_log_data_t cld = {
0,
@@ -2412,9 +2456,6 @@ reconfigure(xlator_t *this, dict_t *options)
char csnap_dir[PATH_MAX] = {
0,
};
- struct timeval tv = {
- 0,
- };
uint32_t timeout = 0;
priv = this->private;
@@ -2423,14 +2464,15 @@ reconfigure(xlator_t *this, dict_t *options)
ret = -1;
active_earlier = priv->active;
+ rpc_active_earlier = priv->rpc_active;
/* first stop the rollover and the fsync thread */
changelog_cleanup_helper_threads(this, priv);
GF_OPTION_RECONF("changelog-dir", tmp, options, str, out);
if (!tmp) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET,
- "\"changelog-dir\" option is not set");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET,
+ NULL);
goto out;
}
@@ -2456,6 +2498,29 @@ reconfigure(xlator_t *this, dict_t *options)
goto out;
GF_OPTION_RECONF("changelog", active_now, options, bool, out);
+ GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool,
+ out);
+
+ /* If journalling is enabled, enable rpc notifications */
+ if (active_now && !active_earlier) {
+ if (!rpc_active_earlier)
+ iniate_rpc = _gf_true;
+ }
+
+ if (rpc_active_now && !rpc_active_earlier) {
+ iniate_rpc = _gf_true;
+ }
+
+ /* TODO: Disable of changelog-notifications is not supported for now
+ * as there is no clean way of cleaning up of rpc resources
+ */
+
+ if (iniate_rpc) {
+ ret = changelog_init_rpc(this, priv);
+ if (ret)
+ goto out;
+ priv->rpc_active = _gf_true;
+ }
/**
* changelog_handle_change() handles changes that could possibly
@@ -2482,9 +2547,7 @@ reconfigure(xlator_t *this, dict_t *options)
out);
if (active_now || active_earlier) {
- ret = changelog_fill_rollover_data(&cld, !active_now);
- if (ret)
- goto out;
+ changelog_fill_rollover_data(&cld, !active_now);
slice = &priv->slice;
@@ -2501,15 +2564,9 @@ reconfigure(xlator_t *this, dict_t *options)
if (active_now) {
if (!active_earlier) {
- gf_msg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_INFO,
- "Reconfigure: Changelog Enable");
- if (gettimeofday(&tv, NULL)) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CHANGELOG_MSG_HTIME_ERROR, "unable to fetch htime");
- ret = -1;
- goto out;
- }
- htime_create(this, priv, tv.tv_sec);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE,
+ NULL);
+ htime_create(this, priv, gf_time());
}
ret = changelog_spawn_helper_threads(this, priv);
}
@@ -2534,8 +2591,7 @@ changelog_freeup_options(xlator_t *this, changelog_priv_t *priv)
ret = priv->cb->dtor(this, &priv->cd);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED,
- "could not cleanup bootstrapper");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED, NULL);
GF_FREE(priv->changelog_brick);
GF_FREE(priv->changelog_dir);
}
@@ -2587,6 +2643,7 @@ changelog_init_options(xlator_t *this, changelog_priv_t *priv)
goto dealloc_2;
GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2);
+ GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2);
GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2);
GF_OPTION_INIT("op-mode", tmp, str, dealloc_2);
@@ -2625,20 +2682,6 @@ error_return:
return -1;
}
-static void
-changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
-{
- /* terminate rpc server */
- changelog_destroy_rpc_listner(this, priv);
-
- /* cleanup rot buffs */
- rbuf_dtor(priv->rbuf);
-
- /* cleanup poller thread */
- if (priv->poller)
- (void)changelog_thread_cleanup(this, priv->poller);
-}
-
static int
changelog_init_rpc(xlator_t *this, changelog_priv_t *priv)
{
@@ -2679,14 +2722,14 @@ init(xlator_t *this)
GF_VALIDATE_OR_GOTO("changelog", this, error_return);
if (!this->children || this->children->next) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED,
- "translator needs a single subvolume");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED,
+ NULL);
goto error_return;
}
if (!this->parents) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED,
- "dangling volume. please check volfile");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED,
+ NULL);
goto error_return;
}
@@ -2696,13 +2739,18 @@ init(xlator_t *this)
this->local_pool = mem_pool_new(changelog_local_t, 64);
if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
- "failed to create local memory pool");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
+ NULL);
goto cleanup_priv;
}
LOCK_INIT(&priv->lock);
LOCK_INIT(&priv->c_snap_lock);
+ GF_ATOMIC_INIT(priv->listnercnt, 0);
+ GF_ATOMIC_INIT(priv->clntcnt, 0);
+ GF_ATOMIC_INIT(priv->xprtcnt, 0);
+ INIT_LIST_HEAD(&priv->xprt_list);
+ priv->htime_fd = -1;
ret = changelog_init_options(this, priv);
if (ret)
@@ -2730,10 +2778,13 @@ init(xlator_t *this)
INIT_LIST_HEAD(&priv->queue);
priv->barrier_enabled = _gf_false;
- /* RPC ball rolling.. */
- ret = changelog_init_rpc(this, priv);
- if (ret)
- goto cleanup_barrier;
+ if (priv->rpc_active || priv->active) {
+ /* RPC ball rolling.. */
+ ret = changelog_init_rpc(this, priv);
+ if (ret)
+ goto cleanup_barrier;
+ priv->rpc_active = _gf_true;
+ }
ret = changelog_init(this, priv);
if (ret)
@@ -2745,13 +2796,16 @@ init(xlator_t *this)
return 0;
cleanup_rpc:
- changelog_cleanup_rpc(this, priv);
+ if (priv->rpc_active) {
+ changelog_cleanup_rpc(this, priv);
+ }
cleanup_barrier:
changelog_barrier_pthread_destroy(priv);
cleanup_options:
changelog_freeup_options(this, priv);
cleanup_mempool:
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
cleanup_priv:
GF_FREE(priv);
error_return:
@@ -2770,9 +2824,11 @@ fini(xlator_t *this)
priv = this->private;
if (priv) {
- /* terminate RPC server/threads */
- changelog_cleanup_rpc(this, priv);
-
+ if (priv->active || priv->rpc_active) {
+ /* terminate RPC server/threads */
+ changelog_cleanup_rpc(this, priv);
+ GF_FREE(priv->ev_dispatcher);
+ }
/* call barrier_disable to cancel timer */
if (priv->barrier_enabled)
__chlog_barrier_disable(this, &queue);
@@ -2841,6 +2897,13 @@ struct volume_options options[] = {
.flags = OPT_FLAG_SETTABLE,
.level = OPT_STATUS_BASIC,
.tags = {"journal", "georep", "glusterfind"}},
+ {.key = {"changelog-notification"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable changelog live notification",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .tags = {"bitrot", "georep"}},
{.key = {"changelog-brick"},
.type = GF_OPTION_TYPE_PATH,
.description = "brick path to generate unique socket file name."
@@ -2910,3 +2973,17 @@ struct volume_options options[] = {
.tags = {"journal", "glusterfind"}},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "changelog",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/changetimerecorder/Makefile.am
deleted file mode 100644
index a985f42a877..00000000000
--- a/xlators/features/changetimerecorder/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am
deleted file mode 100644
index 620017e3309..00000000000
--- a/xlators/features/changetimerecorder/src/Makefile.am
+++ /dev/null
@@ -1,26 +0,0 @@
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-
-# changetimerecorder can only get build when libgfdb is enabled
-if BUILD_GFDB
- xlator_LTLIBRARIES = changetimerecorder.la
-endif
-
-changetimerecorder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-
-changetimerecorder_la_SOURCES = changetimerecorder.c \
- ctr-helper.c ctr-xlator-ctx.c
-
-changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
- $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la
-
-noinst_HEADERS = ctr-messages.h changetimerecorder.h ctr_mem_types.h \
- ctr-helper.h ctr-xlator-ctx.h
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/libglusterfs/src/gfdb \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
- -DDATADIR=\"$(localstatedir)\"
-
-AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS)
-
-CLEANFILES =
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c
deleted file mode 100644
index 3c63c43ee3d..00000000000
--- a/xlators/features/changetimerecorder/src/changetimerecorder.c
+++ /dev/null
@@ -1,2357 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-#include <ctype.h>
-#include <sys/uio.h>
-
-#include "gfdb_sqlite3.h"
-#include "ctr-helper.h"
-#include "ctr-messages.h"
-#include "syscall.h"
-
-#include "changetimerecorder.h"
-#include "tier-ctr-interface.h"
-
-/*******************************inode forget***********************************/
-int
-ctr_forget(xlator_t *this, inode_t *inode)
-{
- fini_ctr_xlator_ctx(this, inode);
- return 0;
-}
-
-/************************** Look up heal **************************************/
-/*
-Problem: The CTR xlator records file meta (heat/hardlinks)
-into the data. This works fine for files which are created
-after ctr xlator is switched ON. But for files which were
-created before CTR xlator is ON, CTR xlator is not able to
-record either of the meta i.e heat or hardlinks. Thus making
-those files immune to promotions/demotions.
-
-Solution: The solution that is implemented in this patch is
-do ctr-db heal of all those pre-existent files, using named lookup.
-For this purpose we use the inode-xlator context variable option
-in gluster.
-The inode-xlator context variable for ctr xlator will have the
-following,
- a. A Lock for the context variable
- b. A hardlink list: This list represents the successful looked
- up hardlinks.
-These are the scenarios when the hardlink list is updated:
-1) Named-Lookup: Whenever a named lookup happens on a file, in the
- wind path we copy all required hardlink and inode information to
- ctr_db_record structure, which resides in the frame->local variable.
- We don't update the database in wind. During the unwind, we read the
- information from the ctr_db_record and ,
- Check if the inode context variable is created, if not we create it.
- Check if the hard link is there in the hardlink list.
- If its not there we add it to the list and send a update to the
- database using libgfdb.
- Please note: The database transaction can fail(and we ignore) as there
- already might be a record in the db. This update to the db is to heal
- if its not there.
- If its there in the list we ignore it.
-2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in
- the inode context variable and delete the inode context variable.
- Please note: An inode forget may happen for two reason,
- a. when the inode is delete.
- b. the in-memory inode is evicted from the inode table due to cache limits.
-3) create: whenever a create happens we create the inode context variable and
- add the hardlink. The database updation is done as usual by ctr.
-4) link: whenever a hardlink is created for the inode, we create the inode
- context variable, if not present, and add the hardlink to the list.
-5) unlink: whenever a unlink happens we delete the hardlink from the list.
-6) mknod: same as create.
-7) rename: whenever a rename happens we update the hardlink in list. if the
- hardlink was not present for updation, we add the hardlink to the list.
-
-What is pending:
-1) This solution will only work for named lookups.
-2) We don't track afr-self-heal/dht-rebalancer traffic for healing.
-
-*/
-
-/* This function does not write anything to the db,
- * just created the local variable
- * for the frame and sets values for the ctr_db_record */
-static int
-ctr_lookup_wind(call_frame_t *frame, xlator_t *this,
- gf_ctr_inode_context_t *ctr_inode_cx)
-{
- int ret = -1;
- gf_ctr_private_t *_priv = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
- GF_ASSERT(this);
- IS_CTR_INODE_CX_SANE(ctr_inode_cx);
-
- _priv = this->private;
- GF_ASSERT(_priv);
-
- if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
- frame->local = init_ctr_local_t(this);
- if (!frame->local) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- "WIND: Error while creating ctr local");
- goto out;
- };
- ctr_local = frame->local;
- /*Definitely no internal fops will reach here*/
- ctr_local->is_internal_fop = _gf_false;
- /*Don't record counters*/
- CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
- /*Don't record time at all*/
- CTR_DB_REC(ctr_local).do_record_times = _gf_false;
-
- /* Copy gfid into db record*/
- gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
-
- /* Set fop_path and fop_type, required by libgfdb to make
- * decision while inserting the record */
- CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
- CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
-
- /* Copy hard link info*/
- gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid,
- *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
- if (snprintf(CTR_DB_REC(ctr_local).file_name,
- sizeof(CTR_DB_REC(ctr_local).file_name), "%s",
- NEW_LINK_CX(ctr_inode_cx)->basename) >=
- sizeof(CTR_DB_REC(ctr_local).file_name)) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- "WIND: Error copying filename of ctr local");
- goto out;
- }
- /* Since we are in lookup we can ignore errors while
- * Inserting in the DB, because there may be many
- * to write to the DB attempts for healing.
- * We don't want to log all failed attempts and
- * bloat the log*/
- ctr_local->gfdb_db_record.ignore_errors = _gf_true;
- }
-
- ret = 0;
-
-out:
-
- if (ret) {
- free_ctr_local(ctr_local);
- frame->local = NULL;
- }
-
- return ret;
-}
-
-/* This function inserts the ctr_db_record populated by ctr_lookup_wind
- * in to the db. It also destroys the frame->local created by ctr_lookup_wind */
-static int
-ctr_lookup_unwind(call_frame_t *frame, xlator_t *this)
-{
- int ret = -1;
- gf_ctr_private_t *_priv = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
-
- _priv = this->private;
- GF_ASSERT(_priv);
-
- GF_ASSERT(_priv->_db_conn);
-
- ctr_local = frame->local;
-
- if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) {
- ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
- if (ret == -1) {
- gf_msg(this->name,
- _gfdb_log_level(GF_LOG_ERROR,
- ctr_local->gfdb_db_record.ignore_errors),
- 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
- "UNWIND: Error filling ctr local");
- goto out;
- }
- }
- ret = 0;
-out:
- free_ctr_local(ctr_local);
- frame->local = NULL;
- return ret;
-}
-
-/******************************************************************************
- *
- * FOPS HANDLING BELOW
- *
- * ***************************************************************************/
-
-/****************************LOOKUP********************************************/
-
-int32_t
-ctr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
-{
- int ret = -1;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- gf_ctr_local_t *ctr_local = NULL;
- ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
- gf_boolean_t _is_heal_needed = _gf_false;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- /* if the lookup failed lookup don't do anything*/
- if (op_ret == -1) {
- gf_msg_trace(this->name, 0, "lookup failed with %s",
- strerror(op_errno));
- goto out;
- }
-
- /* Ignore directory lookups */
- if (inode->ia_type == IA_IFDIR) {
- goto out;
- }
-
- /* if frame local was not set by the ctr_lookup()
- * so don't so anything*/
- if (!frame->local) {
- goto out;
- }
-
- /* if the lookup is for dht link donot record*/
- if (dht_is_linkfile(buf, dict)) {
- gf_msg_trace(this->name, 0,
- "Ignoring Lookup "
- "for dht link file");
- goto out;
- }
-
- ctr_local = frame->local;
- /*Assign the proper inode type*/
- ctr_local->ia_inode_type = inode->ia_type;
-
- /* Copy gfid directly from inode */
- gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid);
-
- /* Checking if gfid and parent gfid is valid */
- if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) ||
- gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) {
- gf_msg_trace(this->name, 0, "Invalid GFID");
- goto out;
- }
-
- /* if its a first entry
- * then mark the ctr_record for create
- * A create will attempt a file and a hard link created in the db*/
- ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode);
- if (!ctr_xlator_ctx) {
- /* This marks inode heal */
- CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
- _is_heal_needed = _gf_true;
- }
-
- /* Copy the correct gfid from resolved inode */
- gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid);
-
- /* Add hard link to the list */
- ret_val = add_hard_link_ctx(frame, this, inode);
- if (ret_val == CTR_CTX_ERROR) {
- gf_msg_trace(this->name, 0, "Failed adding hardlink to list");
- goto out;
- }
- /* If inode needs healing then heal the hardlink also */
- else if (ret_val & CTR_TRY_INODE_HEAL) {
- /* This marks inode heal */
- CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE;
- _is_heal_needed = _gf_true;
- }
- /* If hardlink needs healing */
- else if (ret_val & CTR_TRY_HARDLINK_HEAL) {
- _is_heal_needed = _gf_true;
- }
-
- /* If lookup heal needed */
- if (!_is_heal_needed)
- goto out;
-
- /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind
- * in to the db. It also destroys the frame->local
- * created by ctr_lookup_wind */
- ret = ctr_lookup_unwind(frame, this);
- if (ret) {
- gf_msg_trace(this->name, 0, "Failed healing/inserting link");
- }
-
-out:
- free_ctr_local((gf_ctr_local_t *)frame->local);
- frame->local = NULL;
-
- STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict,
- postparent);
-
- return 0;
-}
-
-int32_t
-ctr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- /* Don't handle nameless lookups*/
- if (!loc->parent || !loc->name)
- goto out;
-
- /*fill ctr link context*/
- FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, out);
-
- /* Fill ctr inode context*/
- /* IA_IFREG : We assume its a file in the wind
- * but in the unwind we are sure what the inode is a file
- * or directory
- * gfid: we are just filling loc->gfid which is not correct.
- * In unwind we fill the correct gfid for successful lookup*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG, loc->gfid, _link_cx, NULL,
- GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
-
- /* Create the frame->local and populate ctr_db_record
- * No writing to the db yet */
- ret = ctr_lookup_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED,
- "Failed to insert link wind");
- }
-
-out:
- STACK_WIND(frame, ctr_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
-/****************************WRITEV********************************************/
-int32_t
-ctr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_UNWIND_FAILED,
- "Failed to insert writev unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-
- return 0;
-}
-
-int32_t
-ctr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_WIND_FAILED,
- "Failed to insert writev wind");
- }
-
-out:
- STACK_WIND(frame, ctr_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags,
- iobref, xdata);
-
- return 0;
-}
-
-/******************************setattr*****************************************/
-
-int32_t
-ctr_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf,
- struct iatt *postop_stbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
- "Failed to insert setattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop_stbuf,
- postop_stbuf, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- "Failed to insert setattr wind");
- }
-out:
-
- STACK_WIND(frame, ctr_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
-
- return 0;
-}
-
-/*************************** fsetattr ***************************************/
-int32_t
-ctr_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf,
- struct iatt *postop_stbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
- "Failed to insert fsetattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, preop_stbuf,
- postop_stbuf, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- "Failed to insert fsetattr wind");
- }
-out:
- STACK_WIND(frame, ctr_fsetattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
-
- return 0;
-}
-/****************************fremovexattr************************************/
-
-int32_t
-ctr_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
- "Failed to insert fremovexattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
- "Failed to insert fremovexattr wind");
- }
-
-out:
- STACK_WIND(frame, ctr_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-}
-
-/****************************removexattr*************************************/
-
-int32_t
-ctr_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
- "Failed to insert removexattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
- "Failed to insert removexattr wind");
- }
-
-out:
- STACK_WIND(frame, ctr_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
-
-/****************************truncate****************************************/
-
-int32_t
-ctr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
- "Failed to insert truncate unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-
- return 0;
-}
-
-int32_t
-ctr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_TRUNCATE_WIND_FAILED,
- "Failed to insert truncate wind");
- }
-out:
- STACK_WIND(frame, ctr_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
-}
-
-/****************************ftruncate***************************************/
-
-int32_t
-ctr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
- "Failed to insert ftruncate unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-
- return 0;
-}
-
-int32_t
-ctr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED,
- "Failed to insert ftruncate wind");
- }
-
-out:
- STACK_WIND(frame, ctr_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
-}
-
-/****************************rename******************************************/
-int32_t
-ctr_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- int ret = -1;
- uint32_t remaining_links = -1;
- gf_ctr_local_t *ctr_local = NULL;
- gfdb_fop_type_t fop_type = GFDB_FOP_INVALID_OP;
- gfdb_fop_path_t fop_path = GFDB_FOP_INVALID;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
- "Failed to insert rename unwind");
- goto out;
- }
-
- if (!xdata)
- goto out;
- /*
- *
- * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
- * This is only set when we are overwriting hardlinks.
- *
- * */
- ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
- &remaining_links);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
- remaining_links = -1;
- goto out;
- }
-
- ctr_local = frame->local;
- if (!ctr_local) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_NULL_LOCAL,
- "ctr_local is NULL.");
- goto out;
- }
-
- /* This is not the only link */
- if (remaining_links > 1) {
- fop_type = GFDB_FOP_DENTRY_WRITE;
- fop_path = GFDB_FOP_UNDEL;
- }
- /* Last link that was deleted */
- else if (remaining_links == 1) {
- fop_type = GFDB_FOP_DENTRY_WRITE;
- fop_path = GFDB_FOP_UNDEL_ALL;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
- "Invalid link count from posix");
- goto out;
- }
-
- ret = ctr_delete_hard_link_from_db(
- this, CTR_DB_REC(ctr_local).old_gfid, CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name, fop_type, fop_path);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
- "Failed to delete records of %s",
- CTR_DB_REC(ctr_local).old_file_name);
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t new_link_cx, old_link_cx;
- gf_ctr_link_context_t *_nlink_cx = &new_link_cx;
- gf_ctr_link_context_t *_olink_cx = &old_link_cx;
- int is_dict_created = 0;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill old link context*/
- FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, out);
-
- /*Fill new link context*/
- FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
- oldloc->inode->gfid, _nlink_cx, _olink_cx,
- GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
-
- /* If the rename is a overwrite of hardlink
- * rename ("file1", "file2")
- * file1 is hardlink for gfid say 00000000-0000-0000-0000-00000000000A
- * file2 is hardlink for gfid say 00000000-0000-0000-0000-00000000000B
- * so we are saving file2 gfid in old_gfid so that we delete entries
- * from the db during rename callback if the fop is successful
- * */
- if (newloc->inode) {
- /* This is the GFID from where the newloc hardlink will be
- * unlinked */
- _inode_cx->old_gfid = &newloc->inode->gfid;
- }
-
- /* Is a metatdata fop */
- _inode_cx->is_metadata_fop = _gf_true;
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_WIND_FAILED,
- "Failed to insert rename wind");
- } else {
- /* We are doing updation of hard link in inode context in wind
- * As we don't get the "inode" in the call back for rename */
- ret = update_hard_link_ctx(frame, this, oldloc->inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_UPDATE_HARDLINK_FAILED,
- "Failed "
- "updating hard link in ctr inode context");
- goto out;
- }
-
- /* If the newloc has an inode. i.e acquiring hardlink of an
- * exisitng file i.e overwritting a file.
- * */
- if (newloc->inode) {
- /* Getting the ctr inode context variable for
- * inode whose hardlink will be acquired during
- * the rename
- * */
- ctr_xlator_ctx = get_ctr_xlator_ctx(this, newloc->inode);
- if (!ctr_xlator_ctx) {
- /* Since there is no ctr inode context
- * so nothing more to do */
- ret = 0;
- goto out;
- }
-
- /* Deleting hardlink from context variable */
- ret = ctr_delete_hard_link(this, ctr_xlator_ctx, newloc->pargfid,
- newloc->name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed to delete hard link");
- goto out;
- }
-
- /* Requesting for number of hardlinks on the newloc
- * inode from POSIX.
- * */
- is_dict_created = set_posix_link_request(this, &xdata);
- if (is_dict_created == -1) {
- ret = -1;
- goto out;
- }
- }
- }
-
-out:
- STACK_WIND(frame, ctr_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
-
- if (is_dict_created == 1) {
- dict_unref(xdata);
- }
-
- return 0;
-}
-
-/****************************unlink******************************************/
-int32_t
-ctr_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int ret = -1;
- uint32_t remaining_links = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- if (!xdata)
- goto out;
-
- /*
- *
- * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator
- *
- * */
- ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
- &remaining_links);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA");
- remaining_links = -1;
- }
-
- /*This is not the only link*/
- if (remaining_links != 1) {
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_UNDEL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
- "Failed to insert unlink unwind");
- }
- }
- /*Last link that was deleted*/
- else if (remaining_links == 1) {
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_UNDEL_ALL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
- "Failed to insert unlink unwind");
- }
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
- xdata);
-
- return 0;
-}
-
-int32_t
-ctr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- gf_boolean_t is_xdata_created = _gf_false;
- struct iatt dummy_stat = {0};
-
- GF_ASSERT(frame);
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill link context*/
- FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- _link_cx, NULL, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_WDEL);
-
- /*Internal FOP*/
- _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
-
- /* Is a metadata FOP */
- _inode_cx->is_metadata_fop = _gf_true;
-
- /* If its a internal FOP and dht link file donot record*/
- if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
- goto out;
- }
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED,
- "Failed to insert unlink wind");
- } else {
- /* We are doing delete of hard link in inode context in wind
- * As we don't get the "inode" in the call back for rename */
- ret = delete_hard_link_ctx(frame, this, loc->inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed "
- "deleting hard link from ctr inode context");
- }
- }
-
- /*
- *
- * Sending GF_REQUEST_LINK_COUNT_XDATA
- * to POSIX Xlator to send link count in unwind path
- *
- * */
- /*create xdata if NULL*/
- if (!xdata) {
- xdata = dict_new();
- is_xdata_created = (xdata) ? _gf_true : _gf_false;
- }
- if (!xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
- "xdata is NULL :Cannot send "
- "GF_REQUEST_LINK_COUNT_XDATA to posix");
- goto out;
- }
-
- ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
- if (is_xdata_created) {
- dict_unref(xdata);
- }
- goto out;
- }
-
-out:
- STACK_WIND(frame, ctr_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
-
- if (is_xdata_created)
- dict_unref(xdata);
-
- return 0;
-}
-
-/****************************fsync******************************************/
-int32_t
-ctr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
- "Failed to insert fsync unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_WIND_FAILED,
- "Failed to insert fsync wind");
- }
-
-out:
- STACK_WIND(frame, ctr_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- return 0;
-}
-
-/****************************setxattr****************************************/
-
-int
-ctr_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
- "Failed to insert setxattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-int
-ctr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
- int flags, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid,
- NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- "Failed to insert setxattr wind");
- }
-
-out:
- STACK_WIND(frame, ctr_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata);
- return 0;
-}
-/**************************** fsetxattr *************************************/
-int32_t
-ctr_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED,
- "Failed to insert fsetxattr unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-int32_t
-ctr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
- CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- "Failed to insert fsetxattr wind");
- }
-
-out:
- STACK_WIND(frame, ctr_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-/****************************mknod*******************************************/
-
-int32_t
-ctr_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- int ret = -1;
- ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- /* Add hard link to the list */
- ret_val = add_hard_link_ctx(frame, this, inode);
- if (ret_val == CTR_CTX_ERROR) {
- gf_msg_trace(this->name, 0, "Failed adding hard link");
- }
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
- GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
- "Failed to insert mknod unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
-
- return 0;
-}
-
-int
-ctr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- uuid_t gfid = {
- 0,
- };
- uuid_t *ptr_gfid = &gfid;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- /*get gfid from xdata dict*/
- ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
- if (ret) {
- gf_msg_debug(this->name, 0, "failed to get gfid from dict");
- goto out;
- }
-
- /*fill ctr link context*/
- FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx,
- NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_WIND_FAILED,
- "Failed to insert mknod wind");
- }
-
-out:
- STACK_WIND(frame, ctr_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
- return 0;
-}
-
-/****************************create******************************************/
-int
-ctr_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = add_hard_link_ctx(frame, this, inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_ADD_HARDLINK_FAILED,
- "Failed adding hard link");
- }
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE,
- GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
- "Failed to insert create unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf,
- preparent, postparent, xdata);
-
- return 0;
-}
-
-int
-ctr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- uuid_t gfid = {
- 0,
- };
- uuid_t *ptr_gfid = &gfid;
- struct iatt dummy_stat = {0};
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- /*Get GFID from Xdata dict*/
- ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_GET_GFID_FROM_DICT_FAILED,
- "failed to get gfid from dict");
- goto out;
- }
-
- /*fill ctr link context*/
- FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx,
- NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND);
-
- /*Internal FOP*/
- _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
-
- /* If its a internal FOP and dht link file donot record*/
- if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
- goto out;
- }
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, &ctr_inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_WIND_FAILED,
- "Failed to insert create wind");
- }
-out:
- STACK_WIND(frame, ctr_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- xdata);
- return 0;
-}
-
-/****************************link********************************************/
-
-int
-ctr_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- /* Add hard link to the list */
- ret = add_hard_link_ctx(frame, this, inode);
- if (ret) {
- gf_msg_trace(this->name, 0, "Failed adding hard link");
- }
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE,
- GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
- "Failed to insert create unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int
-ctr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
- gf_ctr_link_context_t ctr_link_cx;
- gf_ctr_link_context_t *_link_cx = &ctr_link_cx;
- struct iatt dummy_stat = {0};
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- /*fill ctr link context*/
- FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type,
- oldloc->inode->gfid, _link_cx, NULL,
- GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND);
-
- /*Internal FOP*/
- _inode_cx->is_internal_fop = is_internal_fop(frame, xdata);
-
- /* Is a metadata fop */
- _inode_cx->is_metadata_fop = _gf_true;
-
- /* If its a internal FOP and dht link file donot record*/
- if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) {
- goto out;
- }
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED,
- "Failed to insert link wind");
- }
-
-out:
- STACK_WIND(frame, ctr_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-/******************************readv*****************************************/
-int
-ctr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
-{
- int ret = -1;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out);
-
- ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ, GFDB_FOP_UNWIND);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED,
- "Failed to insert create unwind");
- }
-
-out:
- ctr_free_frame_local(frame);
-
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
- return 0;
-}
-
-int
-ctr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off,
- uint32_t flags, dict_t *xdata)
-{
- int ret = -1;
- gf_ctr_inode_context_t ctr_inode_cx;
- gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx;
-
- CTR_IS_DISABLED_THEN_GOTO(this, out);
- CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out);
-
- /*Fill ctr inode context*/
- FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL,
- NULL, GFDB_FOP_INODE_READ, GFDB_FOP_WIND);
-
- /*record into the database*/
- ret = ctr_insert_wind(frame, this, _inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_READV_WIND_FAILED,
- "Failed to insert readv wind");
- }
-
-out:
- STACK_WIND(frame, ctr_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, off, flags, xdata);
- return 0;
-}
-
-/*******************************ctr_ipc****************************************/
-
-/*This is the call back function per record/file from data base*/
-static int
-ctr_db_query_callback(gfdb_query_record_t *gfdb_query_record, void *args)
-{
- int ret = -1;
- ctr_query_cbk_args_t *query_cbk_args = args;
-
- GF_VALIDATE_OR_GOTO("ctr", query_cbk_args, out);
-
- ret = gfdb_write_query_record(query_cbk_args->query_fd, gfdb_query_record);
- if (ret) {
- gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "Failed to write to query file");
- goto out;
- }
-
- query_cbk_args->count++;
-
- ret = 0;
-out:
- return ret;
-}
-
-/* This function does all the db queries related to tiering and
- * generates/populates new/existing query file
- * inputs:
- * xlator_t *this : CTR Translator
- * void *conn_node : Database connection
- * char *query_file: the query file that needs to be updated
- * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters
- * Return:
- * On success 0
- * On failure -1
- * */
-int
-ctr_db_query(xlator_t *this, void *conn_node, char *query_file,
- gfdb_ipc_ctr_params_t *ipc_ctr_params)
-{
- int ret = -1;
- ctr_query_cbk_args_t query_cbk_args = {0};
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, conn_node, out);
- GF_VALIDATE_OR_GOTO(this->name, query_file, out);
- GF_VALIDATE_OR_GOTO(this->name, ipc_ctr_params, out);
-
- /*Query for eligible files from db*/
- query_cbk_args.query_fd = open(query_file, O_WRONLY | O_CREAT | O_APPEND,
- S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
- if (query_cbk_args.query_fd < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno, CTR_MSG_FATAL_ERROR,
- "Failed to open query file %s", query_file);
- goto out;
- }
- if (!ipc_ctr_params->is_promote) {
- if (ipc_ctr_params->emergency_demote) {
- /* emergency demotion mode */
- ret = find_all(conn_node, ctr_db_query_callback,
- (void *)&query_cbk_args,
- ipc_ctr_params->query_limit);
- } else {
- if (ipc_ctr_params->write_freq_threshold == 0 &&
- ipc_ctr_params->read_freq_threshold == 0) {
- ret = find_unchanged_for_time(conn_node, ctr_db_query_callback,
- (void *)&query_cbk_args,
- &ipc_ctr_params->time_stamp);
- } else {
- ret = find_unchanged_for_time_freq(
- conn_node, ctr_db_query_callback, (void *)&query_cbk_args,
- &ipc_ctr_params->time_stamp,
- ipc_ctr_params->write_freq_threshold,
- ipc_ctr_params->read_freq_threshold, _gf_false);
- }
- }
- } else {
- if (ipc_ctr_params->write_freq_threshold == 0 &&
- ipc_ctr_params->read_freq_threshold == 0) {
- ret = find_recently_changed_files(conn_node, ctr_db_query_callback,
- (void *)&query_cbk_args,
- &ipc_ctr_params->time_stamp);
- } else {
- ret = find_recently_changed_files_freq(
- conn_node, ctr_db_query_callback, (void *)&query_cbk_args,
- &ipc_ctr_params->time_stamp,
- ipc_ctr_params->write_freq_threshold,
- ipc_ctr_params->read_freq_threshold, _gf_false);
- }
- }
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: query from db failed");
- goto out;
- }
-
- ret = clear_files_heat(conn_node);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed to clear db entries");
- goto out;
- }
-
- ret = 0;
-out:
-
- if (!ret)
- ret = query_cbk_args.count;
-
- if (query_cbk_args.query_fd >= 0) {
- sys_close(query_cbk_args.query_fd);
- query_cbk_args.query_fd = -1;
- }
-
- return ret;
-}
-
-void *
-ctr_compact_thread(void *args)
-{
- int ret = -1;
- void *db_conn = NULL;
-
- xlator_t *this = NULL;
- gf_ctr_private_t *priv = NULL;
- gf_boolean_t compact_active = _gf_false;
- gf_boolean_t compact_mode_switched = _gf_false;
-
- this = (xlator_t *)args;
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
-
- priv = this->private;
-
- db_conn = priv->_db_conn;
- compact_active = priv->compact_active;
- compact_mode_switched = priv->compact_mode_switched;
-
- gf_msg("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET, "Starting compaction");
-
- ret = compact_db(db_conn, compact_active, compact_mode_switched);
-
- if (ret) {
- gf_msg("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to perform the compaction");
- }
-
- ret = pthread_mutex_lock(&priv->compact_lock);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to acquire lock");
- goto out;
- }
-
- /* We are done compaction on this brick. Set all flags to false */
- priv->compact_active = _gf_false;
- priv->compact_mode_switched = _gf_false;
-
- ret = pthread_mutex_unlock(&priv->compact_lock);
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to release lock");
- goto out;
- }
-
-out:
- return NULL;
-}
-
-int
-ctr_ipc_helper(xlator_t *this, dict_t *in_dict, dict_t *out_dict)
-{
- int ret = -1;
- char *ctr_ipc_ops = NULL;
- gf_ctr_private_t *priv = NULL;
- char *db_version = NULL;
- char *db_param_key = NULL;
- char *db_param = NULL;
- char *query_file = NULL;
- gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL;
- int result = 0;
- pthread_t compact_thread;
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv->_db_conn, out);
- GF_VALIDATE_OR_GOTO(this->name, in_dict, out);
- GF_VALIDATE_OR_GOTO(this->name, out_dict, out);
-
- GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY, ctr_ipc_ops,
- out);
-
- /*if its a db clear operation */
- if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS,
- SLEN(GFDB_IPC_CTR_CLEAR_OPS)) == 0) {
- ret = clear_files_heat(priv->_db_conn);
- if (ret)
- goto out;
-
- } /* if its a query operation, in which case its query + clear db*/
- else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS,
- SLEN(GFDB_IPC_CTR_QUERY_OPS)) == 0) {
- ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_QFILE_PATH, &query_file);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed extracting query file path");
- goto out;
- }
-
- ret = dict_get_bin(in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS,
- (void *)&ipc_ctr_params);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed extracting query parameters");
- goto out;
- }
-
- ret = ctr_db_query(this, priv->_db_conn, query_file, ipc_ctr_params);
-
- ret = dict_set_int32(out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ret);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed setting query reply");
- goto out;
- }
-
- } /* if its a query for db version */
- else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS,
- SLEN(GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) {
- ret = get_db_version(priv->_db_conn, &db_version);
- if (ret == -1 || !db_version) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed extracting db version ");
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, out_dict, GFDB_IPC_CTR_RET_DB_VERSION,
- db_version, ret, error);
-
- } /* if its a query for a db setting */
- else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS,
- SLEN(GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) {
- ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_DB_KEY, &db_param_key);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed extracting db param key");
- goto out;
- }
-
- ret = get_db_params(priv->_db_conn, db_param_key, &db_param);
- if (ret == -1 || !db_param) {
- goto out;
- }
-
- SET_DB_PARAM_TO_DICT(this->name, out_dict, db_param_key, db_param, ret,
- error);
- } /* if its an attempt to compact the database */
- else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA,
- SLEN(GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) {
- ret = pthread_mutex_lock(&priv->compact_lock);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to acquire lock for compaction");
- goto out;
- }
-
- if ((priv->compact_active || priv->compact_mode_switched)) {
- /* Compaction in progress. LEAVE */
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Compaction already in progress.");
- pthread_mutex_unlock(&priv->compact_lock);
- goto out;
- }
- /* At this point, we should be the only one on the brick */
- /* compacting */
-
- /* Grab the arguments from the dictionary */
- ret = dict_get_int32(in_dict, "compact_active", &result);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to get compaction type");
- goto out;
- }
-
- if (result) {
- priv->compact_active = _gf_true;
- }
-
- ret = dict_get_int32(in_dict, "compact_mode_switched", &result);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to see if compaction switched");
- goto out;
- }
-
- if (result) {
- priv->compact_mode_switched = _gf_true;
- gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
- "Pre-thread: Compact mode switch is true");
- } else {
- gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET,
- "Pre-thread: Compact mode switch is false");
- }
-
- ret = pthread_mutex_unlock(&priv->compact_lock);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to release lock for compaction");
- goto out;
- }
-
- ret = gf_thread_create(&compact_thread, NULL, ctr_compact_thread,
- (void *)this, "ctrcomp");
-
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed to spawn compaction thread");
- goto out;
- }
-
- goto out;
- } /* default case */
- else {
- goto out;
- }
-
- ret = 0;
- goto out;
-error:
- GF_FREE(db_param_key);
- GF_FREE(db_param);
- GF_FREE(db_version);
-out:
- return ret;
-}
-
-/* IPC Call from tier migrator to clear the heat on the DB */
-int32_t
-ctr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *in_dict)
-{
- int ret = -1;
- gf_ctr_private_t *priv = NULL;
- dict_t *out_dict = NULL;
-
- GF_ASSERT(this);
- priv = this->private;
- GF_ASSERT(priv);
- GF_ASSERT(priv->_db_conn);
- GF_VALIDATE_OR_GOTO(this->name, in_dict, wind);
-
- if (op != GF_IPC_TARGET_CTR)
- goto wind;
-
- out_dict = dict_new();
- if (!out_dict) {
- goto out;
- }
-
- ret = ctr_ipc_helper(this, in_dict, out_dict);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET,
- "Failed in ctr_ipc_helper");
- }
-out:
-
- STACK_UNWIND_STRICT(ipc, frame, ret, 0, out_dict);
-
- if (out_dict)
- dict_unref(out_dict);
-
- return 0;
-
-wind:
- STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ipc, op, in_dict);
-
- return 0;
-}
-
-/* Call to initialize db for ctr xlator while ctr is enabled */
-int32_t
-initialize_ctr_resource(xlator_t *this, gf_ctr_private_t *priv)
-{
- int ret_db = -1;
- dict_t *params_dict = NULL;
-
- if (!priv)
- goto error;
-
- /* For compaction */
- priv->compact_active = _gf_false;
- priv->compact_mode_switched = _gf_false;
- ret_db = pthread_mutex_init(&priv->compact_lock, NULL);
-
- if (ret_db) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed initializing compaction mutex");
- goto error;
- }
-
- params_dict = dict_new();
- if (!params_dict) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INIT_DB_PARAMS_FAILED,
- "DB Params cannot initialized!");
- goto error;
- }
-
- /*Extract db params options*/
- ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type);
- if (ret_db) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
- "Failed extracting db params options");
- goto error;
- }
-
- /*Create a memory pool for ctr xlator*/
- this->local_pool = mem_pool_new(gf_ctr_local_t, 64);
- if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
- "failed to create local memory pool");
- goto error;
- }
-
- /*Initialize Database Connection*/
- priv->_db_conn = init_db(params_dict, priv->gfdb_db_type);
- if (!priv->_db_conn) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed initializing data base");
- goto error;
- }
-
- ret_db = 0;
- goto out;
-
-error:
- if (this)
- mem_pool_destroy(this->local_pool);
-
- if (priv) {
- GF_FREE(priv->ctr_db_path);
- }
- GF_FREE(priv);
- ret_db = -1;
-out:
- if (params_dict)
- dict_unref(params_dict);
-
- return ret_db;
-}
-
-/******************************************************************************/
-int
-reconfigure(xlator_t *this, dict_t *options)
-{
- char *temp_str = NULL;
- int ret = 0;
- gf_ctr_private_t *priv = NULL;
-
- priv = this->private;
-
- if (dict_get_str(options, "changetimerecorder.frequency", &temp_str)) {
- gf_msg(this->name, GF_LOG_TRACE, 0, CTR_MSG_SET, "set");
- }
-
- GF_OPTION_RECONF("ctr-enabled", priv->enabled, options, bool, out);
- if (!priv->enabled) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
- "CTR Xlator is not enabled so skip ctr reconfigure");
- goto out;
- }
-
- /* If ctr is enabled after skip init for ctr xlator then call
- initialize_ctr_resource during reconfigure phase to allocate resources
- for xlator
- */
- if (priv->enabled && !priv->_db_conn) {
- ret = initialize_ctr_resource(this, priv);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed ctr initialize resource");
- goto out;
- }
- }
-
- GF_OPTION_RECONF("record-counters", priv->ctr_record_counter, options, bool,
- out);
-
- GF_OPTION_RECONF("ctr-record-metadata-heat", priv->ctr_record_metadata_heat,
- options, bool, out);
-
- GF_OPTION_RECONF("ctr_link_consistency", priv->ctr_link_consistency,
- options, bool, out);
-
- GF_OPTION_RECONF("ctr_lookupheal_inode_timeout",
- priv->ctr_lookupheal_inode_timeout, options, uint64, out);
-
- GF_OPTION_RECONF("ctr_lookupheal_link_timeout",
- priv->ctr_lookupheal_link_timeout, options, uint64, out);
-
- GF_OPTION_RECONF("record-exit", priv->ctr_record_unwind, options, bool,
- out);
-
- GF_OPTION_RECONF("record-entry", priv->ctr_record_wind, options, bool, out);
-
- /* If database is sqlite */
- if (priv->gfdb_db_type == GFDB_SQLITE3) {
- /* AUTOCHECKPOINT */
- if (dict_get_str(options, GFDB_SQL_PARAM_WAL_AUTOCHECK, &temp_str) ==
- 0) {
- ret = set_db_params(priv->_db_conn, "wal_autocheckpoint", temp_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
- "Failed to set %s", GFDB_SQL_PARAM_WAL_AUTOCHECK);
- }
- }
-
- /* CACHE_SIZE */
- if (dict_get_str(options, GFDB_SQL_PARAM_CACHE_SIZE, &temp_str) == 0) {
- ret = set_db_params(priv->_db_conn, "cache_size", temp_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
- "Failed to set %s", GFDB_SQL_PARAM_CACHE_SIZE);
- }
- }
- }
-
- ret = 0;
-
-out:
-
- return ret;
-}
-
-/****************************init********************************************/
-
-int32_t
-init(xlator_t *this)
-{
- gf_ctr_private_t *priv = NULL;
- int ret_db = -1;
-
- if (!this) {
- gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: ctr this is not initialized");
- return -1;
- }
-
- if (!this->children || this->children->next) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: ctr should have exactly one child");
- return -1;
- }
-
- if (!this->parents) {
- gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DANGLING_VOLUME,
- "dangling volume. check volfile ");
- }
-
- priv = GF_CALLOC(1, sizeof(*priv), gf_ctr_mt_private_t);
- if (!priv) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED,
- "Calloc did not work!!!");
- return -1;
- }
-
- /*Default values for the translator*/
- priv->ctr_record_wind = _gf_true;
- priv->ctr_record_unwind = _gf_false;
- priv->ctr_hot_brick = _gf_false;
- priv->gfdb_db_type = GFDB_SQLITE3;
- priv->gfdb_sync_type = GFDB_DB_SYNC;
- priv->_db_conn = NULL;
- priv->ctr_lookupheal_link_timeout = CTR_DEFAULT_HARDLINK_EXP_PERIOD;
- priv->ctr_lookupheal_inode_timeout = CTR_DEFAULT_INODE_EXP_PERIOD;
-
- /*Extract ctr xlator options*/
- ret_db = extract_ctr_options(this, priv);
- if (ret_db) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
- "Failed extracting ctr xlator options");
- GF_FREE(priv);
- return -1;
- }
-
- if (!priv->enabled) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
- "CTR Xlator is not enabled so skip ctr init");
- goto out;
- }
-
- ret_db = initialize_ctr_resource(this, priv);
- if (ret_db) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR,
- "FATAL: Failed ctr initialize resource");
- return -1;
- }
-
-out:
- this->private = (void *)priv;
- return 0;
-}
-
-int
-notify(xlator_t *this, int event, void *data, ...)
-{
- gf_ctr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- if (!priv)
- goto out;
-
- ret = default_notify(this, event, data);
-
-out:
- return ret;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
-
- ret = xlator_mem_acct_init(this, gf_ctr_mt_end + 1);
-
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_MEM_ACC_INIT_FAILED,
- "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-void
-fini(xlator_t *this)
-{
- gf_ctr_private_t *priv = NULL;
-
- priv = this->private;
-
- if (priv && priv->enabled) {
- if (fini_db(priv->_db_conn)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED,
- "Failed closing "
- "db connection");
- }
-
- if (priv->_db_conn)
- priv->_db_conn = NULL;
-
- GF_FREE(priv->ctr_db_path);
- if (pthread_mutex_destroy(&priv->compact_lock)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED,
- "Failed to "
- "destroy the compaction mutex");
- }
- }
- GF_FREE(priv);
- mem_pool_destroy(this->local_pool);
- this->local_pool = NULL;
-
- return;
-}
-
-struct xlator_fops fops = {
- /*lookup*/
- .lookup = ctr_lookup,
- /*write fops */
- .mknod = ctr_mknod,
- .create = ctr_create,
- .truncate = ctr_truncate,
- .ftruncate = ctr_ftruncate,
- .setxattr = ctr_setxattr,
- .fsetxattr = ctr_fsetxattr,
- .removexattr = ctr_removexattr,
- .fremovexattr = ctr_fremovexattr,
- .unlink = ctr_unlink,
- .link = ctr_link,
- .rename = ctr_rename,
- .writev = ctr_writev,
- .setattr = ctr_setattr,
- .fsetattr = ctr_fsetattr,
- /*read fops*/
- .readv = ctr_readv,
- /* IPC call*/
- .ipc = ctr_ipc};
-
-struct xlator_cbks cbks = {.forget = ctr_forget};
-
-struct volume_options options[] = {
- {.key =
- {
- "ctr-enabled",
- },
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off",
- .description = "Enables the CTR",
- .flags = OPT_FLAG_SETTABLE},
- {.key = {"record-entry"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "on"},
- {.key = {"record-exit"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off"},
- {.key = {"record-counters"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off",
- .op_version = {GD_OP_VERSION_3_7_0},
- .flags = OPT_FLAG_SETTABLE,
- .tags = {}},
- {.key = {"ctr-record-metadata-heat"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {"ctr_link_consistency"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {"ctr_lookupheal_link_timeout"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "300",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_2},
- .tags = {}},
- {.key = {"ctr_lookupheal_inode_timeout"},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "300",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_2},
- .tags = {}},
- {.key = {"hot-brick"},
- .type = GF_OPTION_TYPE_BOOL,
- .value = {"on", "off"},
- .default_value = "off"},
- {.key = {"db-type"},
- .type = GF_OPTION_TYPE_STR,
- .value = {"hashfile", "rocksdb", "changelog", "sqlite3", "hyperdex"},
- .default_value = "sqlite3",
- .op_version = {GD_OP_VERSION_3_7_0},
- .flags = OPT_FLAG_SETTABLE,
- .tags = {}},
- {.key = {"db-sync"},
- .type = GF_OPTION_TYPE_STR,
- .value = {"sync", "async"},
- .default_value = "sync"},
- {.key = {"db-path"}, .type = GF_OPTION_TYPE_PATH},
- {.key = {"db-name"}, .type = GF_OPTION_TYPE_STR},
- {.key = {GFDB_SQL_PARAM_SYNC},
- .type = GF_OPTION_TYPE_STR,
- .value = {"off", "normal", "full"},
- .default_value = "normal"},
- {.key = {GFDB_SQL_PARAM_JOURNAL_MODE},
- .type = GF_OPTION_TYPE_STR,
- .value = {"delete", "truncate", "persist", "memory", "wal", "off"},
- .default_value = "wal",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {GFDB_SQL_PARAM_AUTO_VACUUM},
- .type = GF_OPTION_TYPE_STR,
- .value = {"off", "full", "incr"},
- .default_value = "off",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {GFDB_SQL_PARAM_WAL_AUTOCHECK},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "25000",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {GFDB_SQL_PARAM_CACHE_SIZE},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "12500",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {GFDB_SQL_PARAM_PAGE_SIZE},
- .type = GF_OPTION_TYPE_INT,
- .default_value = "4096",
- .flags = OPT_FLAG_SETTABLE,
- .op_version = {GD_OP_VERSION_3_7_0},
- .tags = {}},
- {.key = {NULL}},
-};
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h
deleted file mode 100644
index 2a8bbd18c5b..00000000000
--- a/xlators/features/changetimerecorder/src/changetimerecorder.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- Copyright (c) 2006-2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CTR_H
-#define __CTR_H
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "ctr_mem_types.h"
-#include "ctr-helper.h"
-
-#endif /* __CTR_H */
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c
deleted file mode 100644
index e1e65735cef..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-helper.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "gfdb_sqlite3.h"
-#include "ctr-helper.h"
-#include "ctr-messages.h"
-
-/*******************************************************************************
- *
- * Fill unwind into db record
- *
- ******************************************************************************/
-int
-fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local,
- gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path)
-{
- int ret = -1;
- gfdb_time_t *ctr_uwtime = NULL;
- gf_ctr_private_t *_priv = NULL;
-
- GF_ASSERT(this);
- _priv = this->private;
- GF_ASSERT(_priv);
-
- GF_ASSERT(ctr_local);
-
- /*If not unwind path error*/
- if (!isunwindpath(fop_path)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
- "Wrong fop_path. Should be unwind");
- goto out;
- }
-
- ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time;
- CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path;
- CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type;
-
- ret = gettimeofday(ctr_uwtime, NULL);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
- "Error "
- "filling unwind time record %s",
- strerror(errno));
- goto out;
- }
-
- /* Special case i.e if its a tier rebalance
- * + cold tier brick
- * + its a create/mknod FOP
- * we record unwind time as zero */
- if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG &&
- (!_priv->ctr_hot_brick) && isdentrycreatefop(fop_type)) {
- memset(ctr_uwtime, 0, sizeof(*ctr_uwtime));
- }
- ret = 0;
-out:
- return ret;
-}
-
-/*******************************************************************************
- *
- * Fill wind into db record
- *
- ******************************************************************************/
-int
-fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local,
- gf_ctr_inode_context_t *ctr_inode_cx)
-{
- int ret = -1;
- gfdb_time_t *ctr_wtime = NULL;
- gf_ctr_private_t *_priv = NULL;
-
- GF_ASSERT(this);
- _priv = this->private;
- GF_ASSERT(_priv);
- GF_ASSERT(ctr_local);
- IS_CTR_INODE_CX_SANE(ctr_inode_cx);
-
- /*if not wind path error!*/
- if (!iswindpath(ctr_inode_cx->fop_path)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH,
- "Wrong fop_path. Should be wind");
- goto out;
- }
-
- ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time;
- CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path;
- CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type;
- CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency;
-
- ret = gettimeofday(ctr_wtime, NULL);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- CTR_MSG_FILL_UNWIND_TIME_REC_ERROR,
- "Error filling wind time record %s", strerror(errno));
- goto out;
- }
-
- /* Special case i.e if its a tier rebalance
- * + cold tier brick
- * + its a create/mknod FOP
- * we record wind time as zero */
- if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG &&
- (!_priv->ctr_hot_brick) && isdentrycreatefop(ctr_inode_cx->fop_type)) {
- memset(ctr_wtime, 0, sizeof(*ctr_wtime));
- }
-
- /* Copy gfid into db record */
- gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid));
-
- /* Copy older gfid if any */
- if (ctr_inode_cx->old_gfid &&
- (!gf_uuid_is_null(*(ctr_inode_cx->old_gfid)))) {
- gf_uuid_copy(CTR_DB_REC(ctr_local).old_gfid, *(ctr_inode_cx->old_gfid));
- }
-
- /*Hard Links*/
- if (isdentryfop(ctr_inode_cx->fop_type)) {
- /*new link fop*/
- if (NEW_LINK_CX(ctr_inode_cx)) {
- gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid,
- *((NEW_LINK_CX(ctr_inode_cx))->pargfid));
- strcpy(CTR_DB_REC(ctr_local).file_name,
- NEW_LINK_CX(ctr_inode_cx)->basename);
- }
- /*rename fop*/
- if (OLD_LINK_CX(ctr_inode_cx)) {
- gf_uuid_copy(CTR_DB_REC(ctr_local).old_pargfid,
- *((OLD_LINK_CX(ctr_inode_cx))->pargfid));
- strcpy(CTR_DB_REC(ctr_local).old_file_name,
- OLD_LINK_CX(ctr_inode_cx)->basename);
- }
- }
-
- ret = 0;
-out:
- /*On error roll back and clean the record*/
- if (ret == -1) {
- CLEAR_CTR_DB_RECORD(ctr_local);
- }
- return ret;
-}
-
-/******************************************************************************
- *
- * CTR xlator init related functions
- *
- *
- * ****************************************************************************/
-static int
-extract_sql_params(xlator_t *this, dict_t *params_dict)
-{
- int ret = -1;
- char *db_path = NULL;
- char *db_name = NULL;
- char *db_full_path = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(params_dict);
-
- /*Extract the path of the db*/
- db_path = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path",
- db_path, "/var/run/gluster/");
-
- /*Extract the name of the db*/
- db_name = NULL;
- GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name",
- db_name, "gf_ctr_db.db");
-
- /*Construct full path of the db*/
- ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name);
- if (ret < 0) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
- CTR_MSG_CONSTRUCT_DB_PATH_FAILED,
- "Construction of full db path failed!");
- goto out;
- }
-
- /*Setting the SQL DB Path*/
- SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH,
- db_full_path, ret, out);
-
- /*Extract rest of the sql params*/
- ret = gfdb_set_sql_params(this->name, this->options, params_dict);
- if (ret) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
- CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
- "Failed setting values to sql param dict!");
- }
-
- ret = 0;
-
-out:
- if (ret)
- GF_FREE(db_full_path);
- return ret;
-}
-
-int
-extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type)
-{
- int ret = -1;
-
- GF_ASSERT(this);
- GF_ASSERT(params_dict);
-
- switch (db_type) {
- case GFDB_SQLITE3:
- ret = extract_sql_params(this, params_dict);
- if (ret)
- goto out;
- break;
- case GFDB_ROCKS_DB:
- case GFDB_HYPERDEX:
- case GFDB_HASH_FILE_STORE:
- case GFDB_INVALID_DB:
- case GFDB_DB_END:
- goto out;
- }
- ret = 0;
-out:
- return ret;
-}
-
-int
-extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv)
-{
- int ret = -1;
- char *_val_str = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(_priv);
-
- /*Checking if the CTR Translator is enabled. By default its disabled*/
- _priv->enabled = _gf_false;
- GF_OPTION_INIT("ctr-enabled", _priv->enabled, bool, out);
- if (!_priv->enabled) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED,
- "CTR Xlator is disabled.");
- ret = 0;
- goto out;
- }
-
- /*Extract db type*/
- GF_OPTION_INIT("db-type", _val_str, str, out);
- _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str);
-
- /*Extract flag for record on wind*/
- GF_OPTION_INIT("record-entry", _priv->ctr_record_wind, bool, out);
-
- /*Extract flag for record on unwind*/
- GF_OPTION_INIT("record-exit", _priv->ctr_record_unwind, bool, out);
-
- /*Extract flag for record on counters*/
- GF_OPTION_INIT("record-counters", _priv->ctr_record_counter, bool, out);
-
- /* Extract flag for record metadata heat */
- GF_OPTION_INIT("ctr-record-metadata-heat", _priv->ctr_record_metadata_heat,
- bool, out);
-
- /*Extract flag for link consistency*/
- GF_OPTION_INIT("ctr_link_consistency", _priv->ctr_link_consistency, bool,
- out);
-
- /*Extract ctr_lookupheal_inode_timeout */
- GF_OPTION_INIT("ctr_lookupheal_inode_timeout",
- _priv->ctr_lookupheal_inode_timeout, uint64, out);
-
- /*Extract ctr_lookupheal_link_timeout*/
- GF_OPTION_INIT("ctr_lookupheal_link_timeout",
- _priv->ctr_lookupheal_link_timeout, uint64, out);
-
- /*Extract flag for hot tier brick*/
- GF_OPTION_INIT("hot-brick", _priv->ctr_hot_brick, bool, out);
-
- /*Extract flag for sync mode*/
- GF_OPTION_INIT("db-sync", _val_str, str, out);
- _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str);
-
- ret = 0;
-
-out:
- return ret;
-}
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h
deleted file mode 100644
index 3268c9d2fb9..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-helper.h
+++ /dev/null
@@ -1,854 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CTR_HELPER_H
-#define __CTR_HELPER_H
-
-#include "xlator.h"
-#include "ctr_mem_types.h"
-#include "iatt.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "logging.h"
-#include "common-utils.h"
-#include <time.h>
-#include <sys/time.h>
-#include <pthread.h>
-
-#include "gfdb_data_store.h"
-#include "ctr-xlator-ctx.h"
-#include "ctr-messages.h"
-
-#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */
-#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */
-
-typedef struct ctr_query_cbk_args {
- int query_fd;
- int count;
-} ctr_query_cbk_args_t;
-
-/*CTR Xlator Private structure*/
-typedef struct gf_ctr_private {
- gf_boolean_t enabled;
- char *ctr_db_path;
- gf_boolean_t ctr_hot_brick;
- gf_boolean_t ctr_record_wind;
- gf_boolean_t ctr_record_unwind;
- gf_boolean_t ctr_record_counter;
- gf_boolean_t ctr_record_metadata_heat;
- gf_boolean_t ctr_link_consistency;
- gfdb_db_type_t gfdb_db_type;
- gfdb_sync_type_t gfdb_sync_type;
- gfdb_conn_node_t *_db_conn;
- uint64_t ctr_lookupheal_link_timeout;
- uint64_t ctr_lookupheal_inode_timeout;
- gf_boolean_t compact_active;
- gf_boolean_t compact_mode_switched;
- pthread_mutex_t compact_lock;
-} gf_ctr_private_t;
-
-/*
- * gf_ctr_local_t is the ctr xlator local data structure that is stored in
- * the call_frame of each FOP.
- *
- * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is
- * used by the insert_record() api from the libgfdb. The gfdb_db_record object
- * will contain all the inode and hardlink(only for dentry fops: create,
- * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind
- * call and will be release during the unwind. The same gfdb_db_record will
- * used for the unwind insert_record() api, to record unwind in the database.
- *
- * ia_inode_type in gf_ctr_local will tell the type of the inode. This is
- * important for during the unwind path. As we will not have the inode during
- * the unwind path. We would have include this in the gfdb_db_record itself
- * but currently we record only file inode information.
- *
- * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and
- * take special/no action. We don't record change/access times or increement
- * heat counter for internal fops from rebalancer.
- * */
-typedef struct gf_ctr_local {
- gfdb_db_record_t gfdb_db_record;
- ia_type_t ia_inode_type;
- gf_boolean_t is_internal_fop;
- gf_special_pid_t client_pid;
-} gf_ctr_local_t;
-/*
- * Easy access of gfdb_db_record of ctr_local
- * */
-#define CTR_DB_REC(ctr_local) (ctr_local->gfdb_db_record)
-
-/*Clear db record*/
-#define CLEAR_CTR_DB_RECORD(ctr_local) \
- do { \
- ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID; \
- memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time), 0, \
- sizeof(gfdb_time_t)); \
- memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time), 0, \
- sizeof(gfdb_time_t)); \
- gf_uuid_clear(ctr_local->gfdb_db_record.gfid); \
- gf_uuid_clear(ctr_local->gfdb_db_record.pargfid); \
- memset(ctr_local->gfdb_db_record.file_name, 0, GF_NAME_MAX + 1); \
- memset(ctr_local->gfdb_db_record.old_file_name, 0, GF_NAME_MAX + 1); \
- ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP; \
- ctr_local->ia_inode_type = IA_INVAL; \
- } while (0)
-
-static gf_ctr_local_t *
-init_ctr_local_t(xlator_t *this)
-{
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(this);
-
- ctr_local = mem_get0(this->local_pool);
- if (!ctr_local) {
- gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- "Error while creating ctr local");
- goto out;
- }
-
- CLEAR_CTR_DB_RECORD(ctr_local);
-out:
- return ctr_local;
-}
-
-static void
-free_ctr_local(gf_ctr_local_t *ctr_local)
-{
- if (ctr_local)
- mem_put(ctr_local);
-}
-
-/******************************************************************************
- *
- *
- * Context Carrier Structures
- *
- *
- * ****************************************************************************/
-
-/*
- * Context Carrier structures are used to carry relevant information about
- * inodes and links from the fops calls to the ctr_insert_wind.
- * These structure just have pointers to the original data and donot
- * do a deep copy of any data. This info is deep copied to
- * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This
- * info remains persistent for the unwind in ctr_local->gfdb_db_record
- * and once used will be destroyed.
- *
- * gf_ctr_link_context_t : Context structure for hard links
- * gf_ctr_inode_context_t : Context structure for inodes
- *
- * */
-
-/*Context Carrier Structure for hard links*/
-typedef struct gf_ctr_link_context {
- uuid_t *pargfid;
- const char *basename;
-} gf_ctr_link_context_t;
-
-/*Context Carrier Structure for inodes*/
-typedef struct gf_ctr_inode_context {
- ia_type_t ia_type;
- uuid_t *gfid;
- uuid_t *old_gfid;
- gf_ctr_link_context_t *new_link_cx;
- gf_ctr_link_context_t *old_link_cx;
- gfdb_fop_type_t fop_type;
- gfdb_fop_path_t fop_path;
- gf_boolean_t is_internal_fop;
- /* Indicating metadata fops */
- gf_boolean_t is_metadata_fop;
-} gf_ctr_inode_context_t;
-
-/*******************Util Macros for Context Carrier Structures*****************/
-
-/*Checks if ctr_link_cx is sane!*/
-#define IS_CTR_LINK_CX_SANE(ctr_link_cx) \
- do { \
- if (ctr_link_cx) { \
- if (ctr_link_cx->pargfid) \
- GF_ASSERT(*(ctr_link_cx->pargfid)); \
- GF_ASSERT(ctr_link_cx->basename); \
- }; \
- } while (0)
-
-/*Clear and fill the ctr_link_context with values*/
-#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, label) \
- do { \
- GF_VALIDATE_OR_GOTO("ctr", ctr_link_cx, label); \
- GF_VALIDATE_OR_GOTO("ctr", _pargfid, label); \
- GF_VALIDATE_OR_GOTO("ctr", _basename, label); \
- memset(ctr_link_cx, 0, sizeof(*ctr_link_cx)); \
- ctr_link_cx->pargfid = &_pargfid; \
- ctr_link_cx->basename = _basename; \
- } while (0)
-
-#define NEW_LINK_CX(ctr_inode_cx) ctr_inode_cx->new_link_cx
-
-#define OLD_LINK_CX(ctr_inode_cx) ctr_inode_cx->old_link_cx
-
-/*Checks if ctr_inode_cx is sane!*/
-#define IS_CTR_INODE_CX_SANE(ctr_inode_cx) \
- do { \
- GF_ASSERT(ctr_inode_cx); \
- GF_ASSERT(ctr_inode_cx->gfid); \
- GF_ASSERT(*(ctr_inode_cx->gfid)); \
- GF_ASSERT(ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP); \
- GF_ASSERT(ctr_inode_cx->fop_path != GFDB_FOP_INVALID); \
- IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \
- IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \
- } while (0)
-
-/*Clear and fill the ctr_inode_context with values*/
-#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx, _ia_type, _gfid, _new_link_cx, \
- _old_link_cx, _fop_type, _fop_path) \
- do { \
- GF_ASSERT(ctr_inode_cx); \
- GF_ASSERT(_gfid); \
- GF_ASSERT(_fop_type != GFDB_FOP_INVALID_OP); \
- GF_ASSERT(_fop_path != GFDB_FOP_INVALID); \
- memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx)); \
- ctr_inode_cx->ia_type = _ia_type; \
- ctr_inode_cx->gfid = &_gfid; \
- IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \
- if (_new_link_cx) \
- NEW_LINK_CX(ctr_inode_cx) = _new_link_cx; \
- IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \
- if (_old_link_cx) \
- OLD_LINK_CX(ctr_inode_cx) = _old_link_cx; \
- ctr_inode_cx->fop_type = _fop_type; \
- ctr_inode_cx->fop_path = _fop_path; \
- } while (0)
-
-/******************************************************************************
- *
- * Util functions or macros used by
- * insert wind and insert unwind
- *
- * ****************************************************************************/
-/* Free ctr frame local */
-static inline void
-ctr_free_frame_local(call_frame_t *frame)
-{
- if (frame) {
- free_ctr_local((gf_ctr_local_t *)frame->local);
- frame->local = NULL;
- }
-}
-
-/* Setting GF_REQUEST_LINK_COUNT_XDATA in dict
- * that has to be sent to POSIX Xlator to send
- * link count in unwind path.
- * return 0 for success with not creation of dict
- * return 1 for success with creation of dict
- * return -1 for failure.
- * */
-static inline int
-set_posix_link_request(xlator_t *this, dict_t **xdata)
-{
- int ret = -1;
- gf_boolean_t is_created = _gf_false;
-
- GF_VALIDATE_OR_GOTO("ctr", this, out);
- GF_VALIDATE_OR_GOTO(this->name, xdata, out);
-
- /*create xdata if NULL*/
- if (!*xdata) {
- *xdata = dict_new();
- is_created = _gf_true;
- ret = 1;
- } else {
- ret = 0;
- }
-
- if (!*xdata) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL,
- "xdata is NULL :Cannot send "
- "GF_REQUEST_LINK_COUNT_XDATA to posix");
- ret = -1;
- goto out;
- }
-
- ret = dict_set_int32(*xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- "Failed setting GF_REQUEST_LINK_COUNT_XDATA");
- ret = -1;
- goto out;
- }
- ret = 0;
-out:
- if (ret == -1) {
- if (*xdata && is_created) {
- dict_unref(*xdata);
- }
- }
- return ret;
-}
-
-/*
- * If a bitrot fop
- * */
-#define BITROT_FOP(frame) \
- (frame->root->pid == GF_CLIENT_PID_BITD || \
- frame->root->pid == GF_CLIENT_PID_SCRUB)
-
-/*
- * If a rebalancer fop
- * */
-#define REBALANCE_FOP(frame) (frame->root->pid == GF_CLIENT_PID_DEFRAG)
-
-/*
- * If its a tiering rebalancer fop
- * */
-#define TIER_REBALANCE_FOP(frame) \
- (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)
-
-/*
- * If its a AFR SELF HEAL
- * */
-#define AFR_SELF_HEAL_FOP(frame) (frame->root->pid == GF_CLIENT_PID_SELF_HEALD)
-
-/*
- * if a rebalancer fop goto
- * */
-#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label) \
- do { \
- if (REBALANCE_FOP(frame)) \
- goto label; \
- } while (0)
-
-/*
- * Internal fop
- *
- * */
-static inline gf_boolean_t
-is_internal_fop(call_frame_t *frame, dict_t *xdata)
-{
- gf_boolean_t ret = _gf_false;
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
-
- if (AFR_SELF_HEAL_FOP(frame)) {
- ret = _gf_true;
- }
- if (BITROT_FOP(frame)) {
- ret = _gf_true;
- }
- if (REBALANCE_FOP(frame) || TIER_REBALANCE_FOP(frame)) {
- ret = _gf_true;
- if (xdata && dict_get(xdata, CTR_ATTACH_TIER_LOOKUP)) {
- ret = _gf_false;
- }
- }
- if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
- ret = _gf_true;
- }
-
- return ret;
-}
-
-#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \
- do { \
- if (is_internal_fop(frame, dict)) \
- goto label; \
- } while (0)
-
-/* if fop has failed exit */
-#define CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, label) \
- do { \
- if (op_ret == -1) { \
- gf_msg_trace(this->name, 0, "Failed fop with %s", \
- strerror(op_errno)); \
- goto label; \
- }; \
- } while (0)
-
-/*
- * IS CTR Xlator is disabled then goto to label
- * */
-#define CTR_IS_DISABLED_THEN_GOTO(this, label) \
- do { \
- gf_ctr_private_t *_priv = NULL; \
- GF_ASSERT(this); \
- GF_ASSERT(this->private); \
- _priv = this->private; \
- if (!_priv->_db_conn) \
- goto label; \
- } while (0)
-
-/*
- * IS CTR record metadata heat is disabled then goto to label
- * */
-#define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label) \
- do { \
- gf_ctr_private_t *_priv = NULL; \
- GF_ASSERT(this); \
- GF_ASSERT(this->private); \
- _priv = this->private; \
- if (!_priv->ctr_record_metadata_heat) \
- goto label; \
- } while (0)
-
-int
-fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local,
- gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path);
-
-int
-fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local,
- gf_ctr_inode_context_t *ctr_inode_cx);
-
-/*******************************************************************************
- * CTR INSERT WIND
- * *****************************************************************************
- * Function used to insert/update record into the database during a wind fop
- * This function creates ctr_local structure into the frame of the fop
- * call.
- * ****************************************************************************/
-
-static inline int
-ctr_insert_wind(call_frame_t *frame, xlator_t *this,
- gf_ctr_inode_context_t *ctr_inode_cx)
-{
- int ret = -1;
- gf_ctr_private_t *_priv = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(frame->root);
- GF_ASSERT(this);
- IS_CTR_INODE_CX_SANE(ctr_inode_cx);
-
- _priv = this->private;
- GF_ASSERT(_priv);
-
- GF_ASSERT(_priv->_db_conn);
-
- /*If record_wind option of CTR is on record wind for
- * regular files only*/
- if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) {
- frame->local = init_ctr_local_t(this);
- if (!frame->local) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- "WIND: Error while creating ctr local");
- goto out;
- };
- ctr_local = frame->local;
- ctr_local->client_pid = frame->root->pid;
- ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop;
-
- /* Decide whether to record counters or not */
- CTR_DB_REC(ctr_local).do_record_counters = _gf_false;
- /* If record counter is enabled */
- if (_priv->ctr_record_counter) {
- /* If not a internal fop */
- if (!(ctr_local->is_internal_fop)) {
- /* If its a metadata fop AND
- * record metadata heat
- * OR
- * its NOT a metadata fop */
- if ((ctr_inode_cx->is_metadata_fop &&
- _priv->ctr_record_metadata_heat) ||
- (!ctr_inode_cx->is_metadata_fop)) {
- CTR_DB_REC(ctr_local).do_record_counters = _gf_true;
- }
- }
- }
-
- /* Decide whether to record times or not
- * For non internal FOPS record times as usual*/
- CTR_DB_REC(ctr_local).do_record_times = _gf_false;
- if (!ctr_local->is_internal_fop) {
- /* If its a metadata fop AND
- * record metadata heat
- * OR
- * its NOT a metadata fop */
- if ((ctr_inode_cx->is_metadata_fop &&
- _priv->ctr_record_metadata_heat) ||
- (!ctr_inode_cx->is_metadata_fop)) {
- CTR_DB_REC(ctr_local).do_record_times =
- (_priv->ctr_record_wind || _priv->ctr_record_unwind);
- }
- }
- /* when its a internal FOPS*/
- else {
- /* Record times only for create
- * i.e when the inode is created */
- CTR_DB_REC(ctr_local).do_record_times = (isdentrycreatefop(
- ctr_inode_cx->fop_type))
- ? _gf_true
- : _gf_false;
- }
-
- /*Fill the db record for insertion*/
- ret = fill_db_record_for_wind(this, ctr_local, ctr_inode_cx);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
- "WIND: Error filling ctr local");
- goto out;
- }
-
- /*Insert the db record*/
- ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_INSERT_RECORD_WIND_FAILED,
- "WIND: Inserting of record failed!");
- goto out;
- }
- }
- ret = 0;
-out:
-
- if (ret) {
- free_ctr_local(ctr_local);
- frame->local = NULL;
- }
-
- return ret;
-}
-
-/*******************************************************************************
- * CTR INSERT UNWIND
- * *****************************************************************************
- * Function used to insert/update record into the database during a unwind fop
- * This function destroys ctr_local structure into the frame of the fop
- * call at the end.
- * ****************************************************************************/
-static inline int
-ctr_insert_unwind(call_frame_t *frame, xlator_t *this, gfdb_fop_type_t fop_type,
- gfdb_fop_path_t fop_path)
-{
- int ret = -1;
- gf_ctr_private_t *_priv = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
-
- _priv = this->private;
- GF_ASSERT(_priv);
-
- GF_ASSERT(_priv->_db_conn);
-
- ctr_local = frame->local;
-
- if (ctr_local && (_priv->ctr_record_unwind || isdentryfop(fop_type)) &&
- (ctr_local->ia_inode_type != IA_IFDIR)) {
- CTR_DB_REC(ctr_local).do_record_uwind_time = _priv->ctr_record_unwind;
-
- ret = fill_db_record_for_unwind(this, ctr_local, fop_type, fop_path);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
- "UNWIND: Error filling ctr local");
- goto out;
- }
-
- ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND,
- "UNWIND: Error filling ctr local");
- goto out;
- }
- }
- ret = 0;
-out:
- return ret;
-}
-
-/******************************************************************************
- * Delete file/flink record/s from db
- * ****************************************************************************/
-static inline int
-ctr_delete_hard_link_from_db(xlator_t *this, uuid_t gfid, uuid_t pargfid,
- char *basename, gfdb_fop_type_t fop_type,
- gfdb_fop_path_t fop_path)
-{
- int ret = -1;
- gfdb_db_record_t gfdb_db_record;
- gf_ctr_private_t *_priv = NULL;
-
- _priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, _priv, out);
- GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(gfid)), out);
- GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(pargfid)), out);
- GF_VALIDATE_OR_GOTO(this->name, (fop_type == GFDB_FOP_DENTRY_WRITE), out);
- GF_VALIDATE_OR_GOTO(
- this->name, (fop_path == GFDB_FOP_UNDEL || GFDB_FOP_UNDEL_ALL), out);
-
- /* Set gfdb_db_record to 0 */
- memset(&gfdb_db_record, 0, sizeof(gfdb_db_record));
-
- /* Copy basename */
- if (snprintf(gfdb_db_record.file_name, GF_NAME_MAX, "%s", basename) >=
- GF_NAME_MAX)
- goto out;
-
- /* Copy gfid into db record */
- gf_uuid_copy(gfdb_db_record.gfid, gfid);
-
- /* Copy pargid into db record */
- gf_uuid_copy(gfdb_db_record.pargfid, pargfid);
-
- gfdb_db_record.gfdb_fop_path = fop_path;
- gfdb_db_record.gfdb_fop_type = fop_type;
-
- /*send delete request to db*/
- ret = insert_record(_priv->_db_conn, &gfdb_db_record);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RECORD_WIND_FAILED,
- "Failed to delete record. %s", basename);
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/******************************* Hard link function ***************************/
-
-static inline gf_boolean_t
-__is_inode_expired(ctr_xlator_ctx_t *ctr_xlator_ctx, gf_ctr_private_t *_priv,
- gfdb_time_t *current_time)
-{
- gf_boolean_t ret = _gf_false;
- uint64_t time_diff = 0;
-
- GF_ASSERT(ctr_xlator_ctx);
- GF_ASSERT(_priv);
- GF_ASSERT(current_time);
-
- time_diff = current_time->tv_sec - ctr_xlator_ctx->inode_heal_period;
-
- ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ? _gf_true
- : _gf_false;
- return ret;
-}
-
-static inline gf_boolean_t
-__is_hardlink_expired(ctr_hard_link_t *ctr_hard_link, gf_ctr_private_t *_priv,
- gfdb_time_t *current_time)
-{
- gf_boolean_t ret = _gf_false;
- uint64_t time_diff = 0;
-
- GF_ASSERT(ctr_hard_link);
- GF_ASSERT(_priv);
- GF_ASSERT(current_time);
-
- time_diff = current_time->tv_sec - ctr_hard_link->hardlink_heal_period;
-
- ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ? _gf_true
- : _gf_false;
-
- return ret;
-}
-
-/* Return values of heal*/
-typedef enum ctr_heal_ret_val {
- CTR_CTX_ERROR = -1,
- /* No healing required */
- CTR_TRY_NO_HEAL = 0,
- /* Try healing hard link */
- CTR_TRY_HARDLINK_HEAL = 1,
- /* Try healing inode */
- CTR_TRY_INODE_HEAL = 2,
-} ctr_heal_ret_val_t;
-
-/**
- * @brief Function to add hard link to the inode context variable.
- * The inode context maintainences a in-memory list. This is used
- * smart healing of database.
- * @param frame of the FOP
- * @param this is the Xlator instant
- * @param inode
- * @return Return ctr_heal_ret_val_t
- */
-
-static inline ctr_heal_ret_val_t
-add_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL;
- int ret = -1;
- gf_ctr_local_t *ctr_local = NULL;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- ctr_hard_link_t *ctr_hard_link = NULL;
- gf_ctr_private_t *_priv = NULL;
- gfdb_time_t current_time = {0};
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
- GF_ASSERT(inode);
- GF_ASSERT(this->private);
-
- _priv = this->private;
-
- ctr_local = frame->local;
- if (!ctr_local) {
- goto out;
- }
-
- ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode);
- if (!ctr_xlator_ctx) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
- "Failed accessing ctr inode context");
- goto out;
- }
-
- LOCK(&ctr_xlator_ctx->lock);
-
- /* Check if the hard link already exists
- * in the ctr inode context*/
- ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx,
- CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name);
- /* if there then ignore */
- if (ctr_hard_link) {
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
- ret_val = CTR_CTX_ERROR;
- goto unlock;
- }
-
- if (__is_hardlink_expired(ctr_hard_link, _priv, &current_time)) {
- ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
- ret_val = ret_val | CTR_TRY_HARDLINK_HEAL;
- }
-
- if (__is_inode_expired(ctr_xlator_ctx, _priv, &current_time)) {
- ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
- ret_val = ret_val | CTR_TRY_INODE_HEAL;
- }
-
- goto unlock;
- }
-
- /* Add the hard link to the list*/
- ret = ctr_add_hard_link(this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED,
- "Failed to add hardlink to the ctr inode context");
- ret_val = CTR_CTX_ERROR;
- goto unlock;
- }
-
- ret_val = CTR_TRY_NO_HEAL;
-unlock:
- UNLOCK(&ctr_xlator_ctx->lock);
-out:
- return ret_val;
-}
-
-static inline int
-delete_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
- GF_ASSERT(inode);
-
- ctr_local = frame->local;
- if (!ctr_local) {
- goto out;
- }
-
- ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode);
- if (!ctr_xlator_ctx) {
- /* Since there is no ctr inode context so nothing more to do */
- ret = 0;
- goto out;
- }
-
- ret = ctr_delete_hard_link(this, ctr_xlator_ctx,
- CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed to delete hard link");
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-static inline int
-update_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- gf_ctr_local_t *ctr_local = NULL;
-
- GF_ASSERT(frame);
- GF_ASSERT(this);
- GF_ASSERT(inode);
-
- ctr_local = frame->local;
- if (!ctr_local) {
- goto out;
- }
-
- ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode);
- if (!ctr_xlator_ctx) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED,
- "Failed accessing ctr inode context");
- goto out;
- }
-
- ret = ctr_update_hard_link(
- this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid,
- CTR_DB_REC(ctr_local).file_name, CTR_DB_REC(ctr_local).old_pargfid,
- CTR_DB_REC(ctr_local).old_file_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed to delete hard link");
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-/******************************************************************************
- *
- * CTR xlator init related functions
- *
- *
- * ****************************************************************************/
-int
-extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type);
-
-int
-extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv);
-
-#endif
diff --git a/xlators/features/changetimerecorder/src/ctr-messages.h b/xlators/features/changetimerecorder/src/ctr-messages.h
deleted file mode 100644
index 105d2265430..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-messages.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
- */
-
-#ifndef _CTR_MESSAGES_H_
-#define _CTR_MESSAGES_H_
-
-#include "glfs-message-id.h"
-
-/* To add new message IDs, append new identifiers at the end of the list.
- *
- * Never remove a message ID. If it's not used anymore, you can rename it or
- * leave it as it is, but not delete it. This is to prevent reutilization of
- * IDs by other messages.
- *
- * The component name must match one of the entries defined in
- * glfs-message-id.h.
- */
-
-GLFS_MSGID(
- CTR, CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND,
- CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND,
- CTR_MSG_INSERT_LINK_WIND_FAILED, CTR_MSG_INSERT_WRITEV_WIND_FAILED,
- CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, CTR_MSG_INSERT_SETATTR_WIND_FAILED,
- CTR_MSG_INSERT_SETATTR_UNWIND_FAILED,
- CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED,
- CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED,
- CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED,
- CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED,
- CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED,
- CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED,
- CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, CTR_MSG_INSERT_RENAME_WIND_FAILED,
- CTR_MSG_INSERT_RENAME_UNWIND_FAILED,
- CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, CTR_MSG_ADD_HARDLINK_FAILED,
- CTR_MSG_DELETE_HARDLINK_FAILED, CTR_MSG_UPDATE_HARDLINK_FAILED,
- CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED,
- CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, CTR_MSG_INSERT_UNLINK_WIND_FAILED,
- CTR_MSG_XDATA_NULL, CTR_MSG_INSERT_FSYNC_WIND_FAILED,
- CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED,
- CTR_MSG_INSERT_MKNOD_WIND_FAILED, CTR_MSG_INSERT_CREATE_WIND_FAILED,
- CTR_MSG_INSERT_CREATE_UNWIND_FAILED, CTR_MSG_INSERT_RECORD_WIND_FAILED,
- CTR_MSG_INSERT_READV_WIND_FAILED, CTR_MSG_GET_GFID_FROM_DICT_FAILED,
- CTR_MSG_SET, CTR_MSG_FATAL_ERROR, CTR_MSG_DANGLING_VOLUME,
- CTR_MSG_CALLOC_FAILED, CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED,
- CTR_MSG_INIT_DB_PARAMS_FAILED, CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED,
- CTR_MSG_MEM_ACC_INIT_FAILED, CTR_MSG_CLOSE_DB_CONN_FAILED,
- CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, CTR_MSG_WRONG_FOP_PATH,
- CTR_MSG_CONSTRUCT_DB_PATH_FAILED, CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED,
- CTR_MSG_XLATOR_DISABLED, CTR_MSG_HARDLINK_MISSING_IN_LIST,
- CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, CTR_MSG_INIT_LOCK_FAILED,
- CTR_MSG_COPY_FAILED, CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED,
- CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, CTR_MSG_NULL_LOCAL);
-
-#endif /* !_CTR_MESSAGES_H_ */
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
deleted file mode 100644
index b6b66d56731..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "ctr-xlator-ctx.h"
-#include "ctr-messages.h"
-#include <time.h>
-#include <sys/time.h>
-
-#define IS_THE_ONLY_HARDLINK(ctr_hard_link) \
- (ctr_hard_link->list.next == ctr_hard_link->list.prev)
-
-static void
-fini_ctr_hard_link(ctr_hard_link_t **ctr_hard_link)
-{
- GF_ASSERT(ctr_hard_link);
-
- if (*ctr_hard_link)
- return;
- GF_FREE((*ctr_hard_link)->base_name);
- GF_FREE(*ctr_hard_link);
- *ctr_hard_link = NULL;
-}
-
-/* Please lock the ctr_xlator_ctx before using this function */
-ctr_hard_link_t *
-ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name)
-{
- ctr_hard_link_t *_hard_link = NULL;
- ctr_hard_link_t *searched_hardlink = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(ctr_xlator_ctx);
-
- if (pgfid == NULL || base_name == NULL)
- goto out;
-
- /*linear search*/
- list_for_each_entry(_hard_link, &ctr_xlator_ctx->hardlink_list, list)
- {
- if (gf_uuid_compare(_hard_link->pgfid, pgfid) == 0 &&
- _hard_link->base_name &&
- strcmp(_hard_link->base_name, base_name) == 0) {
- searched_hardlink = _hard_link;
- break;
- }
- }
-
-out:
- return searched_hardlink;
-}
-
-/* Please lock the ctr_xlator_ctx before using this function */
-int
-ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name)
-{
- int ret = -1;
- ctr_hard_link_t *ctr_hard_link = NULL;
- struct timeval current_time = {0};
-
- GF_ASSERT(this);
- GF_ASSERT(ctr_xlator_ctx);
-
- if (pgfid == NULL || base_name == NULL)
- goto out;
-
- ctr_hard_link = GF_CALLOC(1, sizeof(*ctr_hard_link), gf_ctr_mt_hard_link_t);
- if (!ctr_hard_link) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED,
- "Failed allocating "
- "ctr_hard_link");
- goto out;
- }
-
- /*Initialize the ctr_hard_link object and
- * Assign the values : parent GFID and basename*/
- INIT_LIST_HEAD(&ctr_hard_link->list);
- gf_uuid_copy(ctr_hard_link->pgfid, pgfid);
- ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED,
- "Failed copying basename"
- "to ctr_hard_link");
- goto error;
- }
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
- goto error;
- }
-
- /*Add the hard link to the list*/
- list_add_tail(&ctr_hard_link->list, &ctr_xlator_ctx->hardlink_list);
-
- ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
-
- /*aal izz well!*/
- ret = 0;
- goto out;
-error:
- GF_FREE(ctr_hard_link);
-out:
- return ret;
-}
-
-static void
-__delete_hard_link_from_list(ctr_hard_link_t **ctr_hard_link)
-{
- GF_ASSERT(ctr_hard_link);
- GF_ASSERT(*ctr_hard_link);
-
- /*Remove hard link from list*/
- list_del(&(*ctr_hard_link)->list);
- fini_ctr_hard_link(ctr_hard_link);
-}
-
-int
-ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name)
-{
- int ret = -1;
- ctr_hard_link_t *ctr_hard_link = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(ctr_xlator_ctx);
-
- LOCK(&ctr_xlator_ctx->lock);
-
- /*Check if the hard link is present */
- ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, pgfid,
- base_name);
- if (!ctr_hard_link) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_HARDLINK_MISSING_IN_LIST,
- "Hard link doesn't exist in the list");
- goto out;
- }
-
- __delete_hard_link_from_list(&ctr_hard_link);
- ctr_hard_link = NULL;
-
- ret = 0;
-out:
- UNLOCK(&ctr_xlator_ctx->lock);
-
- return ret;
-}
-
-int
-ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name, uuid_t old_pgfid,
- const char *old_base_name)
-{
- int ret = -1;
- ctr_hard_link_t *ctr_hard_link = NULL;
- struct timeval current_time = {0};
-
- GF_ASSERT(this);
- GF_ASSERT(ctr_xlator_ctx);
-
- LOCK(&ctr_xlator_ctx->lock);
-
- /*Check if the hard link is present */
- ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, old_pgfid,
- old_base_name);
- if (!ctr_hard_link) {
- gf_msg_trace(this->name, 0,
- "Hard link doesn't exist"
- " in the list");
- /* Since the hard link is not present in the list
- * we add it to the list */
- ret = ctr_add_hard_link(this, ctr_xlator_ctx, pgfid, base_name);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED,
- "Failed adding hard link to the list");
- goto out;
- }
- ret = 0;
- goto out;
- }
-
- /* update the hard link */
- gf_uuid_copy(ctr_hard_link->pgfid, pgfid);
- GF_FREE(ctr_hard_link->base_name);
- ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED,
- "Failed copying basename"
- "to ctr_hard_link");
- /* delete the corrupted entry */
- __delete_hard_link_from_list(&ctr_hard_link);
- ctr_hard_link = NULL;
- goto out;
- }
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
- ctr_hard_link->hardlink_heal_period = 0;
- } else {
- ctr_hard_link->hardlink_heal_period = current_time.tv_sec;
- }
-
- ret = 0;
-
-out:
- UNLOCK(&ctr_xlator_ctx->lock);
-
- return ret;
-}
-
-/* Delete all hardlinks */
-static int
-ctr_delete_all_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx)
-{
- int ret = -1;
- ctr_hard_link_t *ctr_hard_link = NULL;
- ctr_hard_link_t *tmp = NULL;
-
- GF_ASSERT(ctr_xlator_ctx);
-
- LOCK(&ctr_xlator_ctx->lock);
-
- list_for_each_entry_safe(ctr_hard_link, tmp, &ctr_xlator_ctx->hardlink_list,
- list)
- {
- /*Remove hard link from list*/
- __delete_hard_link_from_list(&ctr_hard_link);
- ctr_hard_link = NULL;
- }
-
- UNLOCK(&ctr_xlator_ctx->lock);
-
- ret = 0;
-
- return ret;
-}
-
-/* Please lock the inode before using this function */
-static ctr_xlator_ctx_t *
-__get_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
-{
- int ret = 0;
- uint64_t _addr = 0;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
-
- GF_ASSERT(this);
- GF_ASSERT(inode);
-
- ret = __inode_ctx_get(inode, this, &_addr);
- if (ret < 0)
- _addr = 0;
- if (_addr != 0) {
- ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr;
- }
-
- return ctr_xlator_ctx;
-}
-
-ctr_xlator_ctx_t *
-init_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- uint64_t _addr = 0;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
- struct timeval current_time = {0};
-
- GF_ASSERT(this);
- GF_ASSERT(inode);
-
- LOCK(&inode->lock);
- {
- ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode);
- if (ctr_xlator_ctx) {
- ret = 0;
- goto out;
- }
- ctr_xlator_ctx = GF_CALLOC(1, sizeof(*ctr_xlator_ctx),
- gf_ctr_mt_xlator_ctx);
- if (!ctr_xlator_ctx)
- goto out;
-
- ret = LOCK_INIT(&ctr_xlator_ctx->lock);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, ret, CTR_MSG_INIT_LOCK_FAILED,
- "Failed init lock %s", strerror(ret));
- goto out;
- }
- _addr = (uint64_t)(uintptr_t)ctr_xlator_ctx;
-
- ret = __inode_ctx_set(inode, this, &_addr);
- if (ret) {
- goto out;
- }
-
- INIT_LIST_HEAD(&ctr_xlator_ctx->hardlink_list);
-
- ret = gettimeofday(&current_time, NULL);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR, "Failed to get current time");
- goto out;
- }
-
- ctr_xlator_ctx->inode_heal_period = current_time.tv_sec;
- }
- ret = 0;
-out:
- if (ret) {
- GF_FREE(ctr_xlator_ctx);
- ctr_xlator_ctx = NULL;
- }
-
- UNLOCK(&inode->lock);
-
- return ctr_xlator_ctx;
-}
-
-void
-fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
-{
- int ret = 0;
- uint64_t _addr = 0;
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
-
- inode_ctx_del(inode, this, &_addr);
- if (!_addr)
- return;
-
- ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr;
-
- ret = ctr_delete_all_hard_link(this, ctr_xlator_ctx);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DELETE_HARDLINK_FAILED,
- "Failed deleting all "
- "hard links from inode context");
- }
-
- LOCK_DESTROY(&ctr_xlator_ctx->lock);
-
- GF_FREE(ctr_xlator_ctx);
-}
-
-ctr_xlator_ctx_t *
-get_ctr_xlator_ctx(xlator_t *this, inode_t *inode)
-{
- ctr_xlator_ctx_t *ctr_xlator_ctx = NULL;
-
- LOCK(&inode->lock);
- ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode);
- UNLOCK(&inode->lock);
-
- return ctr_xlator_ctx;
-}
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
deleted file mode 100644
index 584d3b79ba4..00000000000
--- a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CTR_XLATOR_CTX_H
-#define __CTR_XLATOR_CTX_H
-
-#include "xlator.h"
-#include "ctr_mem_types.h"
-#include "iatt.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "locking.h"
-#include "common-utils.h"
-#include <time.h>
-#include <sys/time.h>
-
-typedef struct ctr_hard_link {
- uuid_t pgfid;
- char *base_name;
- /* Hardlink expiry : Defines the expiry period after which a
- * database heal is attempted. */
- uint64_t hardlink_heal_period;
- struct list_head list;
-} ctr_hard_link_t;
-
-typedef struct ctr_xlator_ctx {
- /* This represents the looked up hardlinks
- * NOTE: This doesn't represent all physical hardlinks of the inode*/
- struct list_head hardlink_list;
- uint64_t inode_heal_period;
- gf_lock_t lock;
-} ctr_xlator_ctx_t;
-
-ctr_hard_link_t *
-ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name);
-
-int
-ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name);
-
-int
-ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name);
-
-int
-ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx,
- uuid_t pgfid, const char *base_name, uuid_t old_pgfid,
- const char *old_base_name);
-
-ctr_xlator_ctx_t *
-get_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
-
-ctr_xlator_ctx_t *
-init_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
-
-void
-fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode);
-
-#endif
diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/changetimerecorder/src/ctr_mem_types.h
deleted file mode 100644
index 7b8f531ddec..00000000000
--- a/xlators/features/changetimerecorder/src/ctr_mem_types.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __CTR_MEM_TYPES_H__
-#define __CTR_MEM_TYPES_H__
-
-#include "gfdb_mem-types.h"
-
-enum gf_ctr_mem_types_ {
- gf_ctr_mt_private_t = gfdb_mt_end + 1,
- gf_ctr_mt_xlator_ctx,
- gf_ctr_mt_hard_link_t,
- gf_ctr_mt_end
-};
-#endif
diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am
index 0c3966c968b..e2a277e372b 100644
--- a/xlators/features/cloudsync/src/Makefile.am
+++ b/xlators/features/cloudsync/src/Makefile.am
@@ -21,9 +21,9 @@ cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources)
nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h
BUILT_SOURCES = cloudsync-autogen-fops.h
-cloudsync_la_LDFLAGS = $(LIB_DL) -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL)
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
-DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\"
diff --git a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c
index 6bb68cd170c..ee63f983980 100644
--- a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c
+++ b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c
@@ -20,11 +20,11 @@
#include <dlfcn.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "cloudsync.h"
#include "cloudsync-common.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#pragma generate
diff --git a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h
index 2db2a9c88c7..d922c77d8aa 100644
--- a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h
+++ b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h
@@ -15,7 +15,7 @@
#ifndef _CLOUDSYNC_AUTOGEN_FOPS_H
#define _CLOUDSYNC_AUTOGEN_FOPS_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "cloudsync.h"
#include "cloudsync-common.h"
diff --git a/xlators/features/cloudsync/src/cloudsync-common.c b/xlators/features/cloudsync/src/cloudsync-common.c
index aee1f06a82a..445a31b90e7 100644
--- a/xlators/features/cloudsync/src/cloudsync-common.c
+++ b/xlators/features/cloudsync/src/cloudsync-common.c
@@ -11,6 +11,20 @@
#include "cloudsync-common.h"
void
+cs_xattrinfo_wipe(cs_local_t *local)
+{
+ if (local->xattrinfo.lxattr) {
+ if (local->xattrinfo.lxattr->file_path)
+ GF_FREE(local->xattrinfo.lxattr->file_path);
+
+ if (local->xattrinfo.lxattr->volname)
+ GF_FREE(local->xattrinfo.lxattr->volname);
+
+ GF_FREE(local->xattrinfo.lxattr);
+ }
+}
+
+void
cs_local_wipe(xlator_t *this, cs_local_t *local)
{
if (!local)
@@ -40,5 +54,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local)
if (local->remotepath)
GF_FREE(local->remotepath);
+ cs_xattrinfo_wipe(local);
+
mem_put(local);
}
diff --git a/xlators/features/cloudsync/src/cloudsync-common.h b/xlators/features/cloudsync/src/cloudsync-common.h
index 0be6a446456..11d233460a4 100644
--- a/xlators/features/cloudsync/src/cloudsync-common.h
+++ b/xlators/features/cloudsync/src/cloudsync-common.h
@@ -10,13 +10,27 @@
#ifndef _CLOUDSYNC_COMMON_H
#define _CLOUDSYNC_COMMON_H
-#include "glusterfs.h"
-#include "call-stub.h"
-#include "xlator.h"
-#include "syncop.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/compat-errno.h>
#include "cloudsync-mem-types.h"
#include "cloudsync-messages.h"
+typedef struct cs_loc_xattr {
+ char *file_path;
+ uuid_t uuid;
+ uuid_t gfid;
+ char *volname;
+} cs_loc_xattr_t;
+
+typedef struct cs_size_xattr {
+ uint64_t size;
+ uint64_t blksize;
+ uint64_t blocks;
+} cs_size_xattr_t;
+
typedef struct cs_local {
loc_t loc;
fd_t *fd;
@@ -34,10 +48,25 @@ typedef struct cs_local {
int call_cnt;
inode_t *inode;
char *remotepath;
+
+ struct {
+ /* offset, flags and size are the information needed
+ * by read fop for remote read operation. These will be
+ * populated in cloudsync read fop, before being passed
+ * on to the plugin performing remote read.
+ */
+ off_t offset;
+ uint32_t flags;
+ size_t size;
+ cs_loc_xattr_t *lxattr;
+ } xattrinfo;
+
} cs_local_t;
typedef int (*fop_download_t)(call_frame_t *frame, void *config);
+typedef int (*fop_remote_read_t)(call_frame_t *, void *);
+
typedef void *(*store_init)(xlator_t *this);
typedef int (*store_reconfigure)(xlator_t *this, dict_t *options);
@@ -48,6 +77,7 @@ struct cs_remote_stores {
char *name; /* store name */
void *config; /* store related information */
fop_download_t dlfop; /* store specific download function */
+ fop_remote_read_t rdfop; /* store specific read function */
store_init init; /* store init to initialize store config */
store_reconfigure reconfigure; /* reconfigure store config */
store_fini fini;
@@ -59,11 +89,15 @@ typedef struct cs_private {
struct cs_remote_stores *stores;
gf_boolean_t abortdl;
pthread_spinlock_t lock;
+ gf_boolean_t remote_read;
} cs_private_t;
void
cs_local_wipe(xlator_t *this, cs_local_t *local);
+void
+cs_xattrinfo_wipe(cs_local_t *local);
+
#define CS_STACK_UNWIND(fop, frame, params...) \
do { \
cs_local_t *__local = NULL; \
@@ -90,6 +124,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local);
typedef struct store_methods {
int (*fop_download)(call_frame_t *frame, void *config);
+ int (*fop_remote_read)(call_frame_t *, void *);
/* return type should be the store config */
void *(*fop_init)(xlator_t *this);
int (*fop_reconfigure)(xlator_t *this, dict_t *options);
diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py
index 3122bd32c01..c27df97ae58 100755
--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py
+++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py
@@ -35,11 +35,19 @@ cs_@NAME@ (call_frame_t *frame, xlator_t *this,
__cs_inode_ctx_get (this, fd->inode, &ctx);
if (ctx)
- state = __cs_get_file_state (this, fd->inode, ctx);
+ state = __cs_get_file_state (fd->inode, ctx);
else
state = GF_CS_LOCAL;
- local->xattr_req = xdata ? dict_ref (xdata) : (xdata = dict_new ());
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->xattr_req = xdata;
ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1);
if (ret) {
@@ -137,15 +145,15 @@ cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
} else {
__cs_inode_ctx_update (this, fd->inode, val);
gf_msg (this->name, GF_LOG_INFO, 0, 0,
- " state = %ld", val);
+ " state = %" PRIu64, val);
if (local->call_cnt == 1 &&
(val == GF_CS_REMOTE ||
val == GF_CS_DOWNLOADING)) {
gf_msg (this->name, GF_LOG_INFO, 0,
0, " will repair and download "
- "the file, current state : %ld",
- val);
+ "the file, current state : %"
+ PRIu64, val);
goto repair;
} else {
gf_msg (this->name, GF_LOG_ERROR, 0, 0,
@@ -187,19 +195,29 @@ int32_t
cs_@NAME@ (call_frame_t *frame, xlator_t *this,
@LONG_ARGS@)
{
+ int op_errno = EINVAL;
cs_local_t *local = NULL;
int ret = 0;
local = cs_local_init (this, frame, loc, NULL, GF_FOP_@UPNAME@);
if (!local) {
gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local is NULL");
+ op_errno = ENOMEM;
goto err;
}
if (loc->inode->ia_type == IA_IFDIR)
goto wind;
- local->xattr_req = xdata ? dict_ref (xdata) : dict_new ();
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->xattr_req = xdata;
ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1);
if (ret) {
@@ -215,7 +233,7 @@ wind:
return 0;
err:
- CS_STACK_UNWIND (@NAME@, frame, -1, errno, @CBK_ERROR_ARGS@);
+ CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@);
return 0;
}
@@ -274,7 +292,7 @@ fd_ops = ['readv', 'writev', 'flush', 'fsync', 'fsyncdir', 'ftruncate',
# These are the current actual lists used to generate the code
# The following list contains fops which are fd based that modifies data
-fd_data_modify_op_fop_template = ['readv', 'writev', 'flush', 'fsync',
+fd_data_modify_op_fop_template = ['writev', 'flush', 'fsync',
'ftruncate', 'rchecksum', 'fallocate',
'discard', 'zerofill', 'seek']
@@ -284,8 +302,8 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr',
'getattr']
# These fops need a separate implementation
-special_fops = ['readdirp', 'statfs', 'setxattr', 'unlink', 'getxattr',
- 'truncate', 'fstat']
+special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr',
+ 'truncate', 'fstat', 'readv', 'readdirp']
def gen_defaults():
for name in ops:
diff --git a/xlators/features/cloudsync/src/cloudsync-mem-types.h b/xlators/features/cloudsync/src/cloudsync-mem-types.h
index 46d4f3aa2a1..220346405d0 100644
--- a/xlators/features/cloudsync/src/cloudsync-mem-types.h
+++ b/xlators/features/cloudsync/src/cloudsync-mem-types.h
@@ -11,11 +11,12 @@
#ifndef __CLOUDSYNC_MEM_TYPES_H__
#define __CLOUDSYNC_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum cs_mem_types_ {
gf_cs_mt_cs_private_t = gf_common_mt_end + 1,
gf_cs_mt_cs_remote_stores_t,
gf_cs_mt_cs_inode_ctx_t,
+ gf_cs_mt_cs_lxattr_t,
gf_cs_mt_end
};
#endif /* __CLOUDSYNC_MEM_TYPES_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
index 4deefb651eb..fb6b0580c6d 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
@@ -2,6 +2,10 @@ if BUILD_AMAZONS3_PLUGIN
AMAZONS3_DIR = cloudsyncs3
endif
-SUBDIRS = ${AMAZONS3_DIR}
+if BUILD_CVLT_PLUGIN
+ CVLT_DIR = cvlt
+endif
+
+SUBDIRS = ${AMAZONS3_DIR} ${CVLT_DIR}
CLEANFILES =
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h
index 0aaab1fe955..7ccfcc9f4b6 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __LIBAWS_MEM_TYPES_H__
#define __LIBAWS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum libaws_mem_types_ {
gf_libaws_mt_aws_private_t = gf_common_mt_end + 1,
gf_libaws_mt_end
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
index e2bbb680f6c..23c3599825a 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
@@ -15,8 +15,8 @@
#include <openssl/buffer.h>
#include <openssl/crypto.h>
#include <curl/curl.h>
-#include "xlator.h"
-#include "glusterfs.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
#include "libcloudsyncs3.h"
#include "cloudsync-common.h"
@@ -237,7 +237,7 @@ aws_form_request(char *resource, char **date, char *reqtype, char *bucketid,
int date_len = -1;
int res_len = -1;
- ctime = time(NULL);
+ ctime = gf_time();
gtime = gmtime(&ctime);
date_len = strftime(httpdate, sizeof(httpdate),
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h
index b1a95f8cbf9..85ae669486b 100644
--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h
@@ -10,10 +10,10 @@
#ifndef _LIBAWS_H
#define _LIBAWS_H
-#include "glusterfs.h"
-#include "call-stub.h"
-#include "xlator.h"
-#include "syncop.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
#include <curl/curl.h>
#include "cloudsync-common.h"
#include "libcloudsyncs3-mem-types.h"
diff --git a/xlators/experimental/dht2/dht2-server/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
index a985f42a877..a985f42a877 100644
--- a/xlators/experimental/dht2/dht2-server/Makefile.am
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
new file mode 100644
index 00000000000..b512464f157
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
@@ -0,0 +1,12 @@
+csp_LTLIBRARIES = cloudsynccvlt.la
+cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins
+
+cloudsynccvlt_la_SOURCES = libcvlt.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c
+cloudsynccvlt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+cloudsynccvlt_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+noinst_HEADERS = archivestore.h libcvlt.h libcvlt-mem-types.h cvlt-messages.h
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -I$(top_srcdir)/xlators/features/cloudsync/src
+CLEANFILES =
+
+EXTRA_DIST = libcloudsynccvlt.sym
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
new file mode 100644
index 00000000000..7230ef77337
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
@@ -0,0 +1,203 @@
+/*
+ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ARCHIVESTORE_H__
+#define __ARCHIVESTORE_H__
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <dlfcn.h>
+#include <uuid/uuid.h>
+
+#define CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid"
+#define CS_XATTR_PRODUCT_ID "trusted.cloudsync.product-id"
+#define CS_XATTR_STORE_ID "trusted.cloudsync.store-id"
+
+struct _archstore_methods;
+typedef struct _archstore_methods archstore_methods_t;
+
+struct _archstore_desc {
+ void *priv; /* Private field for store mgmt. */
+ /* To be used only by archive store*/
+};
+typedef struct _archstore_desc archstore_desc_t;
+
+struct _archstore_info {
+ char *id; /* Identifier for the archivestore */
+ uint32_t idlen; /* Length of identifier string */
+ char *prod; /* Name of the data mgmt. product */
+ uint32_t prodlen; /* Length of the product string */
+};
+typedef struct _archstore_info archstore_info_t;
+
+struct _archstore_fileinfo {
+ uuid_t uuid; /* uuid of the file */
+ char *path; /* file path */
+ uint32_t pathlength; /* length of file path */
+};
+typedef struct _archstore_fileinfo archstore_fileinfo_t;
+
+struct _app_callback_info {
+ archstore_info_t *src_archstore;
+ archstore_fileinfo_t *src_archfile;
+ archstore_info_t *dest_archstore;
+ archstore_fileinfo_t *dest_archfile;
+};
+typedef struct _app_callback_info app_callback_info_t;
+
+typedef void (*app_callback_t)(archstore_desc_t *, app_callback_info_t *,
+ void *, int64_t, int32_t);
+
+enum _archstore_scan_type { FULL = 1, INCREMENTAL = 2 };
+typedef enum _archstore_scan_type archstore_scan_type_t;
+
+typedef int32_t archstore_errno_t;
+
+/*
+ * Initialize archive store.
+ * arg1 pointer to structure containing archive store information
+ * arg2 error number if any generated during the initialization
+ * arg3 name of the log file
+ */
+typedef int32_t (*init_archstore_t)(archstore_desc_t *, archstore_errno_t *,
+ const char *);
+
+/*
+ * Clean up archive store.
+ * arg1 pointer to structure containing archive store information
+ * arg2 error number if any generated during the cleanup
+ */
+typedef int32_t (*term_archstore_t)(archstore_desc_t *, archstore_errno_t *);
+
+/*
+ * Read the contents of the file from archive store
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 pointer to structure containing information about file to be read
+ * arg4 offset in the file from which data should be read
+ * arg5 buffer where the data should be read
+ * arg6 number of bytes of data to be read
+ * arg7 error number if any generated during the read from file
+ * arg8 callback handler to be invoked after the data is read
+ * arg9 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*read_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *, off_t, char *,
+ size_t, archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Restore the contents of the file from archive store
+ * This is basically in-place restore
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 pointer to structure containing information about file to be restored
+ * arg4 error number if any generated during the file restore
+ * arg5 callback to be invoked after the file is restored
+ * arg6 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*recall_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Restore the contents of the file from archive store to a different store
+ * This is basically out-of-place restore
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about file to be restored
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about the location to
+ which the file will be restored
+ * arg6 error number if any generated during the file restore
+ * arg7 callback to be invoked after the file is restored
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*restore_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Archive the contents of the file to archive store
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about files to be archived
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about files that failed
+ * to be archived
+ * arg6 error number if any generated during the file archival
+ * arg7 callback to be invoked after the file is archived
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*archive_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Backup list of files provided in the input file
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about files to be backed up
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about files that failed
+ * to be backed up
+ * arg6 error number if any generated during the file archival
+ * arg7 callback to be invoked after the file is archived
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*backup_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Scan the contents of a store and determine the files which need to be
+ * backed up.
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 type of scan whether full or incremental
+ * arg4 path to file that contains list of files to be backed up
+ * arg5 error number if any generated during scan operation
+ */
+typedef int32_t (*scan_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_scan_type_t, char *,
+ archstore_errno_t *);
+
+struct _archstore_methods {
+ init_archstore_t init;
+ term_archstore_t fini;
+ backup_archstore_t backup;
+ archive_archstore_t archive;
+ scan_archstore_t scan;
+ restore_archstore_t restore;
+ recall_archstore_t recall;
+ read_archstore_t read;
+};
+
+typedef int (*get_archstore_methods_t)(archstore_methods_t *);
+
+/*
+ * Single function that will be invoked by applications for extracting
+ * the function pointers to all data management functions.
+ */
+int32_t
+get_archstore_methods(archstore_methods_t *);
+
+#endif /* End of __ARCHIVESTORE_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
new file mode 100644
index 00000000000..57c9aa77da0
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _CVLT_MESSAGES_H_
+#define _CVLT_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(CVLT, CVLT_EXTRACTION_FAILED, CVLT_FREE,
+ CVLT_RESOURCE_ALLOCATION_FAILED, CVLT_RESTORE_FAILED,
+ CVLT_READ_FAILED, CVLT_NO_MEMORY, CVLT_DLOPEN_FAILED);
+
+#endif /* !_CVLT_MESSAGES_H_ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
new file mode 100644
index 00000000000..0bc273670d5
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
@@ -0,0 +1 @@
+store_ops
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
new file mode 100644
index 00000000000..c24fab8bfe7
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __LIBCVLT_MEM_TYPES_H__
+#define __LIBCVLT_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+enum libcvlt_mem_types_ {
+ gf_libcvlt_mt_cvlt_private_t = gf_common_mt_end + 1,
+ gf_libcvlt_mt_end
+};
+#endif /* __LIBCVLT_MEM_TYPES_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
new file mode 100644
index 00000000000..5b7272bb448
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
@@ -0,0 +1,842 @@
+#include <stdlib.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include "libcvlt.h"
+#include "cloudsync-common.h"
+#include "cvlt-messages.h"
+
+#define LIBARCHIVE_SO "libopenarchive.so"
+#define ALIGN_SIZE 4096
+#define CVLT_TRAILER "cvltv1"
+
+store_methods_t store_ops = {
+ .fop_download = cvlt_download,
+ .fop_init = cvlt_init,
+ .fop_reconfigure = cvlt_reconfigure,
+ .fop_fini = cvlt_fini,
+ .fop_remote_read = cvlt_read,
+};
+
+static const int32_t num_req = 32;
+static const int32_t num_iatt = 32;
+static char *plugin = "cvlt_cloudSync";
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_libcvlt_mt_end + 1);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ return ret;
+}
+
+static void
+cvlt_free_resources(archive_t *arch)
+{
+ /*
+ * We will release all the resources that were allocated by the xlator.
+ * Check whether there are any buffers which have not been released
+ * back to a mempool.
+ */
+
+ if (arch->handle) {
+ dlclose(arch->handle);
+ }
+
+ if (arch->iobuf_pool) {
+ iobuf_pool_destroy(arch->iobuf_pool);
+ }
+
+ if (arch->req_pool) {
+ mem_pool_destroy(arch->req_pool);
+ arch->req_pool = NULL;
+ }
+
+ return;
+}
+
+static int32_t
+cvlt_extract_store_fops(xlator_t *this, archive_t *arch)
+{
+ int32_t op_ret = -1;
+ get_archstore_methods_t get_archstore_methods;
+
+ /*
+ * libopenarchive.so defines methods for performing data management
+ * operations. We will extract the methods from library and these
+ * methods will be invoked for moving data between glusterfs volume
+ * and the data management product.
+ */
+
+ VALIDATE_OR_GOTO(arch, err);
+
+ arch->handle = dlopen(LIBARCHIVE_SO, RTLD_NOW);
+ if (!arch->handle) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_DLOPEN_FAILED,
+ " failed to open %s ", LIBARCHIVE_SO);
+ return op_ret;
+ }
+
+ dlerror(); /* Clear any existing error */
+
+ get_archstore_methods = dlsym(arch->handle, "get_archstore_methods");
+ if (!get_archstore_methods) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " Error extracting get_archstore_methods()");
+ dlclose(arch->handle);
+ arch->handle = NULL;
+ return op_ret;
+ }
+
+ op_ret = get_archstore_methods(&(arch->fops));
+ if (op_ret) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " Failed to extract methods in get_archstore_methods");
+ dlclose(arch->handle);
+ arch->handle = NULL;
+ return op_ret;
+ }
+
+err:
+ return op_ret;
+}
+
+static int32_t
+cvlt_alloc_resources(xlator_t *this, archive_t *arch, int num_req, int num_iatt)
+{
+ /*
+ * Initialize information about all the memory pools that will be
+ * used by this xlator.
+ */
+ arch->nreqs = 0;
+
+ arch->req_pool = NULL;
+
+ arch->handle = NULL;
+ arch->xl = this;
+
+ arch->req_pool = mem_pool_new(cvlt_request_t, num_req);
+ if (!arch->req_pool) {
+ goto err;
+ }
+
+ arch->iobuf_pool = iobuf_pool_new();
+ if (!arch->iobuf_pool) {
+ goto err;
+ }
+
+ if (cvlt_extract_store_fops(this, arch)) {
+ goto err;
+ }
+
+ return 0;
+
+err:
+
+ return -1;
+}
+
+static void
+cvlt_req_init(cvlt_request_t *req)
+{
+ sem_init(&(req->sem), 0, 0);
+
+ return;
+}
+
+static void
+cvlt_req_destroy(cvlt_request_t *req)
+{
+ if (req->iobuf) {
+ iobuf_unref(req->iobuf);
+ }
+
+ if (req->iobref) {
+ iobref_unref(req->iobref);
+ }
+
+ sem_destroy(&(req->sem));
+
+ return;
+}
+
+static cvlt_request_t *
+cvlt_alloc_req(archive_t *arch)
+{
+ cvlt_request_t *reqptr = NULL;
+
+ if (!arch) {
+ goto err;
+ }
+
+ if (arch->req_pool) {
+ reqptr = mem_get0(arch->req_pool);
+ if (reqptr) {
+ cvlt_req_init(reqptr);
+ }
+ }
+
+ if (reqptr) {
+ LOCK(&(arch->lock));
+ arch->nreqs++;
+ UNLOCK(&(arch->lock));
+ }
+
+err:
+ return reqptr;
+}
+
+static int32_t
+cvlt_free_req(archive_t *arch, cvlt_request_t *reqptr)
+{
+ if (!reqptr) {
+ goto err;
+ }
+
+ if (!arch) {
+ goto err;
+ }
+
+ if (arch->req_pool) {
+ /*
+ * Free the request resources if they exist.
+ */
+
+ cvlt_req_destroy(reqptr);
+ mem_put(reqptr);
+
+ LOCK(&(arch->lock));
+ arch->nreqs--;
+ UNLOCK(&(arch->lock));
+ }
+
+ return 0;
+
+err:
+ return -1;
+}
+
+static int32_t
+cvlt_init_xlator(xlator_t *this, archive_t *arch, int num_req, int num_iatt)
+{
+ int32_t ret = -1;
+ int32_t errnum = -1;
+ int32_t locked = 0;
+
+ /*
+ * Perform all the initializations needed for brining up the xlator.
+ */
+ if (!arch) {
+ goto err;
+ }
+
+ LOCK_INIT(&(arch->lock));
+ LOCK(&(arch->lock));
+
+ locked = 1;
+
+ ret = cvlt_alloc_resources(this, arch, num_req, num_iatt);
+
+ if (ret) {
+ goto err;
+ }
+
+ /*
+ * Now that the fops have been extracted initialize the store
+ */
+ ret = arch->fops.init(&(arch->descinfo), &errnum, plugin);
+ if (ret) {
+ goto err;
+ }
+
+ UNLOCK(&(arch->lock));
+ locked = 0;
+ ret = 0;
+
+ return ret;
+
+err:
+ if (arch) {
+ cvlt_free_resources(arch);
+
+ if (locked) {
+ UNLOCK(&(arch->lock));
+ }
+ }
+
+ return ret;
+}
+
+static int32_t
+cvlt_term_xlator(archive_t *arch)
+{
+ int32_t errnum = -1;
+
+ if (!arch) {
+ goto err;
+ }
+
+ LOCK(&(arch->lock));
+
+ /*
+ * Release the resources that have been allocated inside store
+ */
+ arch->fops.fini(&(arch->descinfo), &errnum);
+
+ cvlt_free_resources(arch);
+
+ UNLOCK(&(arch->lock));
+
+ GF_FREE(arch);
+
+ return 0;
+
+err:
+ return -1;
+}
+
+static int32_t
+cvlt_init_store_info(archive_t *priv, archstore_info_t *store_info)
+{
+ if (!store_info) {
+ return -1;
+ }
+
+ store_info->prod = priv->product_id;
+ store_info->prodlen = strlen(priv->product_id);
+
+ store_info->id = priv->store_id;
+ store_info->idlen = strlen(priv->store_id);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_file_info(cs_loc_xattr_t *xattr, archstore_fileinfo_t *file_info)
+{
+ if (!xattr || !file_info) {
+ return -1;
+ }
+
+ gf_uuid_copy(file_info->uuid, xattr->uuid);
+ file_info->path = xattr->file_path;
+ file_info->pathlength = strlen(xattr->file_path);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_gluster_store_info(cs_loc_xattr_t *xattr,
+ archstore_info_t *store_info)
+{
+ static char *product = "glusterfs";
+
+ if (!xattr || !store_info) {
+ return -1;
+ }
+
+ store_info->prod = product;
+ store_info->prodlen = strlen(product);
+
+ store_info->id = xattr->volname;
+ store_info->idlen = strlen(xattr->volname);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_gluster_file_info(cs_loc_xattr_t *xattr,
+ archstore_fileinfo_t *file_info)
+{
+ if (!xattr || !file_info) {
+ return -1;
+ }
+
+ gf_uuid_copy(file_info->uuid, xattr->gfid);
+ file_info->path = xattr->file_path;
+ file_info->pathlength = strlen(xattr->file_path);
+
+ return 0;
+}
+
+static void
+cvlt_copy_stat_info(struct iatt *buf, cs_size_xattr_t *xattrs)
+{
+ /*
+ * If the file was archived then the reported size will not be a
+ * correct one. We need to fix this.
+ */
+ if (buf && xattrs) {
+ buf->ia_size = xattrs->size;
+ buf->ia_blksize = xattrs->blksize;
+ buf->ia_blocks = xattrs->blocks;
+ }
+
+ return;
+}
+
+static void
+cvlt_readv_complete(archstore_desc_t *desc, app_callback_info_t *cbkinfo,
+ void *cookie, int64_t op_ret, int32_t op_errno)
+{
+ struct iovec iov;
+ xlator_t *this = NULL;
+ struct iatt postbuf = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ cvlt_request_t *req = (cvlt_request_t *)cookie;
+ cs_local_t *local = NULL;
+ cs_private_t *cspriv = NULL;
+ archive_t *priv = NULL;
+
+ frame = req->frame;
+ this = frame->this;
+ local = frame->local;
+
+ cspriv = this->private;
+ priv = (archive_t *)cspriv->stores->config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_msg_debug(plugin, 0,
+ " Read callback invoked offset:%" PRIu64 "bytes: %" PRIu64
+ " op : %d ret : %" PRId64 " errno : %d",
+ req->offset, req->bytes, req->op_type, op_ret, op_errno);
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ req->iobref = iobref_new();
+ if (!req->iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add(req->iobref, req->iobuf);
+ iov.iov_base = iobuf_ptr(req->iobuf);
+ iov.iov_len = op_ret;
+
+ cvlt_copy_stat_info(&postbuf, &(req->szxattr));
+
+ /*
+ * Hack to notify higher layers of EOF.
+ */
+ if (!postbuf.ia_size || (req->offset + iov.iov_len >= postbuf.ia_size)) {
+ gf_msg_debug(plugin, 0, " signalling end-of-file for uuid=%s",
+ uuid_utoa(req->file_info.uuid));
+ op_errno = ENOENT;
+ }
+
+out:
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf,
+ req->iobref, local->xattr_rsp);
+
+ cvlt_free_req(priv, req);
+
+ return;
+}
+
+static void
+cvlt_download_complete(archstore_desc_t *store, app_callback_info_t *cbk_info,
+ void *cookie, int64_t ret, int errcode)
+{
+ cvlt_request_t *req = (cvlt_request_t *)cookie;
+
+ gf_msg_debug(plugin, 0,
+ " Download callback invoked ret : %" PRId64 " errno : %d",
+ ret, errcode);
+
+ req->op_ret = ret;
+ req->op_errno = errcode;
+ sem_post(&(req->sem));
+
+ return;
+}
+
+void *
+cvlt_init(xlator_t *this)
+{
+ int ret = 0;
+ archive_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0,
+ "should have exactly one child");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0,
+ "dangling volume. check volfile");
+ ret = -1;
+ goto out;
+ }
+
+ priv = GF_CALLOC(1, sizeof(archive_t), gf_libcvlt_mt_cvlt_private_t);
+ if (!priv) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->trailer = CVLT_TRAILER;
+ if (cvlt_init_xlator(this, priv, num_req, num_iatt)) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, "xlator init failed");
+ ret = -1;
+ goto out;
+ }
+
+ GF_OPTION_INIT("cloudsync-store-id", priv->store_id, str, out);
+ GF_OPTION_INIT("cloudsync-product-id", priv->product_id, str, out);
+
+ gf_msg(plugin, GF_LOG_INFO, 0, 0,
+ "store id is : %s "
+ "product id is : %s.",
+ priv->store_id, priv->product_id);
+out:
+ if (ret == -1) {
+ cvlt_term_xlator(priv);
+ return (NULL);
+ }
+ return priv;
+}
+
+int
+cvlt_reconfigure(xlator_t *this, dict_t *options)
+{
+ cs_private_t *cspriv = NULL;
+ archive_t *priv = NULL;
+
+ cspriv = this->private;
+ priv = (archive_t *)cspriv->stores->config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER))
+ goto out;
+
+ GF_OPTION_RECONF("cloudsync-store-id", priv->store_id, options, str, out);
+
+ GF_OPTION_RECONF("cloudsync-product-id", priv->product_id, options, str,
+ out);
+ gf_msg_debug(plugin, 0,
+ "store id is : %s "
+ "product id is : %s.",
+ priv->store_id, priv->product_id);
+ return 0;
+out:
+ return -1;
+}
+
+void
+cvlt_fini(void *config)
+{
+ archive_t *priv = NULL;
+
+ priv = (archive_t *)config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER))
+ return;
+
+ cvlt_term_xlator(priv);
+ gf_msg(plugin, GF_LOG_INFO, 0, CVLT_FREE, " released xlator resources");
+ return;
+}
+
+int
+cvlt_download(call_frame_t *frame, void *config)
+{
+ archive_t *parch = NULL;
+ cs_local_t *local = frame->local;
+ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr;
+ cvlt_request_t *req = NULL;
+ archstore_info_t dest_storeinfo;
+ archstore_fileinfo_t dest_fileinfo;
+ int32_t op_ret, op_errno;
+
+ parch = (archive_t *)config;
+
+ if (strcmp(parch->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ gf_msg_debug(plugin, 0, " download invoked for uuid = %s gfid=%s ",
+ locxattr->uuid, uuid_utoa(locxattr->gfid));
+
+ if (!(parch->fops.restore)) {
+ op_errno = ELIBBAD;
+ goto err;
+ }
+
+ /*
+ * Download needs to be processed. Allocate a request.
+ */
+ req = cvlt_alloc_req(parch);
+
+ if (!req) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_RESOURCE_ALLOCATION_FAILED,
+ " failed to allocated request for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * Initialize the request object.
+ */
+ req->op_type = CVLT_RESTORE_OP;
+ req->frame = frame;
+
+ /*
+ * The file is currently residing inside a data management store.
+ * To restore the file contents we need to provide the information
+ * about data management store.
+ */
+ op_ret = cvlt_init_store_info(parch, &(req->store_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract store info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ op_ret = cvlt_init_file_info(locxattr, &(req->file_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * We need to perform in-place restore of the file from data management
+ * store to gusterfs volume.
+ */
+ op_ret = cvlt_init_gluster_store_info(locxattr, &dest_storeinfo);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract destination store info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ op_ret = cvlt_init_gluster_file_info(locxattr, &dest_fileinfo);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * Submit the restore request.
+ */
+ op_ret = parch->fops.restore(&(parch->descinfo), &(req->store_info),
+ &(req->file_info), &dest_storeinfo,
+ &dest_fileinfo, &op_errno,
+ cvlt_download_complete, req);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED,
+ " failed to restore file gfid=%s from data management store",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * Wait for the restore to complete.
+ */
+ sem_wait(&(req->sem));
+
+ if (req->op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED,
+ " restored failed for gfid=%s", uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return 0;
+
+err:
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return -1;
+}
+
+int
+cvlt_read(call_frame_t *frame, void *config)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ archive_t *parch = NULL;
+ cvlt_request_t *req = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobref *iobref;
+ size_t size = 0;
+ off_t off = 0;
+
+ cs_local_t *local = frame->local;
+ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr;
+
+ size = local->xattrinfo.size;
+ off = local->xattrinfo.offset;
+
+ parch = (archive_t *)config;
+
+ if (strcmp(parch->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ gf_msg_debug(plugin, 0,
+ " read invoked for gfid = %s offset = %" PRIu64
+ " file_size = %" PRIu64,
+ uuid_utoa(locxattr->gfid), off, local->stbuf.ia_size);
+
+ if (off >= local->stbuf.ia_size) {
+ /*
+ * Hack to notify higher layers of EOF.
+ */
+
+ op_errno = ENOENT;
+ op_ret = 0;
+
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED,
+ " reporting end-of-file for gfid=%s", uuid_utoa(locxattr->gfid));
+
+ goto err;
+ }
+
+ if (!size) {
+ op_errno = EINVAL;
+
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED,
+ " zero size read attempted on gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ if (!(parch->fops.read)) {
+ op_errno = ELIBBAD;
+ goto err;
+ }
+
+ /*
+ * The read request need to be processed. Allocate a request.
+ */
+ req = cvlt_alloc_req(parch);
+
+ if (!req) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_NO_MEMORY,
+ " failed to allocated request for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ req->iobuf = iobuf_get_page_aligned(parch->iobuf_pool, size, ALIGN_SIZE);
+ if (!req->iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * Initialize the request object.
+ */
+ req->op_type = CVLT_READ_OP;
+ req->offset = off;
+ req->bytes = size;
+ req->frame = frame;
+ req->szxattr.size = local->stbuf.ia_size;
+ req->szxattr.blocks = local->stbuf.ia_blocks;
+ req->szxattr.blksize = local->stbuf.ia_blksize;
+
+ /*
+ * The file is currently residing inside a data management store.
+ * To read the file contents we need to provide the information
+ * about data management store.
+ */
+ op_ret = cvlt_init_store_info(parch, &(req->store_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract store info for gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ op_ret = cvlt_init_file_info(locxattr, &(req->file_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ /*
+ * Submit the read request.
+ */
+ op_ret = parch->fops.read(&(parch->descinfo), &(req->store_info),
+ &(req->file_info), off, req->iobuf->ptr, size,
+ &op_errno, cvlt_readv_complete, req);
+
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " read failed on gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ return 0;
+
+err:
+
+ iobref = iobref_new();
+ gf_msg_debug(plugin, 0, " read unwinding stack op_ret = %d, op_errno = %d",
+ op_ret, op_errno);
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1,
+ &(local->stbuf), iobref, local->xattr_rsp);
+
+ if (iobref) {
+ iobref_unref(iobref);
+ }
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return 0;
+}
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
new file mode 100644
index 00000000000..c45ac948f6c
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
@@ -0,0 +1,84 @@
+/*
+ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _LIBCVLT_H
+#define _LIBCVLT_H
+
+#include <semaphore.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/compat-errno.h>
+#include "cloudsync-common.h"
+#include "libcvlt-mem-types.h"
+#include "archivestore.h"
+
+enum _cvlt_op {
+ CVLT_READ_OP = 1,
+ CVLT_WRITE_OP = 2,
+ CVLT_RESTORE_OP = 3,
+ CVLT_ARCHIVE_OP = 4,
+ CVLT_LOOKUP_OP = 5,
+ CVLT_XATTR_OP = 6,
+ CVLT_STAT_OP = 7,
+ CVLT_FSTAT_op = 8,
+ CVLT_UNDEF_OP = 127
+};
+typedef enum _cvlt_op cvlt_op_t;
+
+struct _archive;
+struct _cvlt_request {
+ uint64_t offset;
+ uint64_t bytes;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ call_frame_t *frame;
+ cvlt_op_t op_type;
+ int32_t op_ret;
+ int32_t op_errno;
+ xlator_t *this;
+ sem_t sem;
+ archstore_info_t store_info;
+ archstore_fileinfo_t file_info;
+ cs_size_xattr_t szxattr;
+};
+typedef struct _cvlt_request cvlt_request_t;
+
+struct _archive {
+ gf_lock_t lock; /* lock for controlling access */
+ xlator_t *xl; /* xlator */
+ void *handle; /* handle returned from dlopen */
+ int32_t nreqs; /* num requests active */
+ struct mem_pool *req_pool; /* pool for requests */
+ struct iobuf_pool *iobuf_pool; /* iobuff pool */
+ archstore_desc_t descinfo; /* Archive store descriptor info */
+ archstore_methods_t fops; /* function pointers */
+ char *product_id;
+ char *store_id;
+ char *trailer;
+};
+typedef struct _archive archive_t;
+
+void *
+cvlt_init(xlator_t *);
+
+int
+cvlt_reconfigure(xlator_t *, dict_t *);
+
+void
+cvlt_fini(void *);
+
+int
+cvlt_download(call_frame_t *, void *);
+
+int
+cvlt_read(call_frame_t *, void *);
+
+#endif
diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c
index 918ed786b6b..7f0b9e563b8 100644
--- a/xlators/features/cloudsync/src/cloudsync.c
+++ b/xlators/features/cloudsync/src/cloudsync.c
@@ -8,17 +8,18 @@
* cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "cloudsync.h"
#include "cloudsync-common.h"
-#include "call-stub.h"
+#include <glusterfs/call-stub.h>
#include "cloudsync-autogen-fops.h"
+#include <string.h>
#include <dlfcn.h>
-void
+static void
cs_cleanup_private(cs_private_t *priv)
{
if (priv) {
@@ -34,11 +35,15 @@ cs_cleanup_private(cs_private_t *priv)
return;
}
-struct cs_plugin plugins[] = {
+static struct cs_plugin plugins[] = {
{.name = "cloudsyncs3",
.library = "cloudsyncs3.so",
.description = "cloudsync s3 store."},
-
+#if defined(__linux__)
+ {.name = "cvlt",
+ .library = "cloudsynccvlt.so",
+ .description = "Commvault content store."},
+#endif
{.name = NULL},
};
@@ -72,12 +77,14 @@ cs_init(xlator_t *this)
this->private = priv;
+ GF_OPTION_INIT("cloudsync-remote-read", priv->remote_read, bool, out);
+
/* temp workaround. Should be configurable through glusterd*/
per_vol = _gf_true;
if (per_vol) {
- if (dict_get_str(this->options, "cloudsync-storetype", &temp_str) ==
- 0) {
+ if (dict_get_str_sizen(this->options, "cloudsync-storetype",
+ &temp_str) == 0) {
for (index = 0; plugins[index].name; index++) {
if (!strcmp(temp_str, plugins[index].name)) {
libname = plugins[index].library;
@@ -135,6 +142,18 @@ cs_init(xlator_t *this)
(void)dlerror();
+ if (priv->remote_read) {
+ priv->stores->rdfop = store_methods->fop_remote_read;
+ if (!priv->stores->rdfop) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "failed to get"
+ " read fop %s",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+ }
+
priv->stores->dlfop = store_methods->fop_download;
if (!priv->stores->dlfop) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
@@ -181,8 +200,10 @@ cs_init(xlator_t *this)
out:
if (ret == -1) {
- if (this->local_pool)
+ if (this->local_pool) {
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
cs_cleanup_private(priv);
@@ -196,6 +217,22 @@ out:
return ret;
}
+int
+cs_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_int = 0;
+ cs_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
+ return 0;
+
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctx_int;
+
+ GF_FREE(ctx);
+ return 0;
+}
+
void
cs_fini(xlator_t *this)
{
@@ -217,6 +254,9 @@ cs_reconfigure(xlator_t *this, dict_t *options)
goto out;
}
+ GF_OPTION_RECONF("cloudsync-remote-read", priv->remote_read, options, bool,
+ out);
+
/* needed only for per volume configuration*/
ret = priv->stores->reconfigure(this, options);
@@ -242,32 +282,6 @@ out:
}
int32_t
-cs_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- gf_dirent_t *tmp = NULL;
- char *sxattr = NULL;
- uint64_t ia_size = 0;
- int ret = 0;
-
- list_for_each_entry(tmp, &entries->list, list)
- {
- ret = dict_get_str(tmp->dict, GF_CS_OBJECT_SIZE, &sxattr);
- if (ret) {
- gf_msg_trace(this->name, 0, "size xattr found");
- continue;
- }
-
- ia_size = atoll(sxattr);
- tmp->d_stat.ia_size = ia_size;
- }
-
- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-int32_t
cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, dict_t *xdata)
{
@@ -277,16 +291,23 @@ cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
if (!xdata) {
xdata = dict_new();
if (!xdata) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "failed to create "
+ "dict");
goto err;
}
}
- ret = dict_set_int32(xdata, GF_CS_OBJECT_SIZE, 1);
+ ret = dict_set_uint32(xdata, GF_CS_OBJECT_STATUS, 1);
if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
goto err;
}
- STACK_WIND(frame, cs_readdirp_cbk, FIRST_CHILD(this),
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
return 0;
err:
@@ -305,7 +326,6 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- /* Do we need lock here? */
local->call_cnt++;
if (op_ret == -1) {
@@ -320,13 +340,13 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto unwind;
} else {
__cs_inode_ctx_update(this, local->loc.inode, val);
- gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %ld", val);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val);
if (local->call_cnt == 1 &&
(val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) {
gf_msg(this->name, GF_LOG_WARNING, 0, 0,
"will repair and download "
- "the file, current state : %ld",
+ "the file, current state : %" PRIu64,
val);
goto repair;
} else {
@@ -368,7 +388,6 @@ int32_t
cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
- int op_errno = -1;
cs_local_t *local = NULL;
int ret = 0;
cs_inode_ctx_t *ctx = NULL;
@@ -381,14 +400,13 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
local = cs_local_init(this, frame, loc, NULL, GF_FOP_TRUNCATE);
if (!local) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed");
- op_errno = ENOMEM;
goto err;
}
__cs_inode_ctx_get(this, loc->inode, &ctx);
if (ctx)
- state = __cs_get_file_state(this, loc->inode, ctx);
+ state = __cs_get_file_state(loc->inode, ctx);
else
state = GF_CS_LOCAL;
@@ -407,7 +425,6 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
xdata);
if (!local->stub) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
- op_errno = ENOMEM;
goto err;
}
@@ -419,14 +436,13 @@ cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
local->call_cnt++;
ret = locate_and_execute(frame);
if (ret) {
- op_errno = ENOMEM;
goto err;
}
}
return 0;
err:
- CS_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ CS_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
@@ -498,7 +514,7 @@ cs_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new());
- tmp = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE);
+ tmp = dict_get_sizen(dict, GF_CS_OBJECT_UPLOAD_COMPLETE);
if (tmp) {
/* Value of key should be the atime */
local->stub = fop_setxattr_stub(frame, cs_resume_setxattr, loc, dict,
@@ -665,7 +681,7 @@ cs_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
if (op_ret == 0) {
ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
if (!ret) {
- gf_msg_debug(this->name, 0, "state %ld", val);
+ gf_msg_debug(this->name, 0, "state %" PRIu64, val);
ret = __cs_inode_ctx_update(this, fd->inode, val);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
@@ -831,7 +847,7 @@ out:
return 0;
}
-void *
+int
cs_download_task(void *arg)
{
call_frame_t *frame = NULL;
@@ -842,7 +858,6 @@ cs_download_task(void *arg)
fd_t *fd = NULL;
cs_local_t *local = NULL;
dict_t *dict = NULL;
- int *retval = NULL;
frame = (call_frame_t *)arg;
@@ -850,13 +865,6 @@ cs_download_task(void *arg)
priv = this->private;
- retval = GF_CALLOC(1, sizeof(int), gf_common_mt_int);
- if (!retval) {
- gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
- ret = -1;
- goto out;
- }
-
if (!priv->stores) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"No remote store "
@@ -972,20 +980,13 @@ out:
local->dlfd = NULL;
}
- if (retval) {
- *retval = ret;
- pthread_exit(retval);
- } else {
- pthread_exit(&ret);
- }
+ return ret;
}
int
cs_download(call_frame_t *frame)
{
- int *retval = NULL;
int ret = 0;
- pthread_t dthread;
cs_local_t *local = NULL;
xlator_t *this = NULL;
@@ -1000,16 +1001,404 @@ cs_download(call_frame_t *frame)
goto out;
}
- ret = gf_thread_create(&dthread, NULL, &cs_download_task, (void *)frame,
- "downloadthread");
+ ret = cs_download_task((void *)frame);
+out:
+ return ret;
+}
+
+int
+cs_set_xattr_req(call_frame_t *frame)
+{
+ cs_local_t *local = NULL;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+
+ /* When remote reads are performed (i.e. reads on remote store),
+ * there needs to be a way to associate a file on gluster volume
+ * with its correspnding file on the remote store. In order to do
+ * that, a unique key can be maintained as an xattr
+ * (GF_CS_XATTR_ARCHIVE_UUID)on the stub file on gluster bricks.
+ * This xattr should be provided to the plugin to
+ * perform the read fop on the correct file. This assumes that the file
+ * hierarchy and name need not be the same on remote store as that of
+ * the gluster volume.
+ */
+ ret = dict_set_sizen_str_sizen(local->xattr_req, GF_CS_XATTR_ARCHIVE_UUID,
+ "1");
+
+ return 0;
+}
+
+int
+cs_update_xattrs(call_frame_t *frame, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int size = -1;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->xattrinfo.lxattr = GF_CALLOC(1, sizeof(cs_loc_xattr_t),
+ gf_cs_mt_cs_lxattr_t);
+ if (!local->xattrinfo.lxattr) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ gf_uuid_copy(local->xattrinfo.lxattr->gfid, local->loc.gfid);
+
+ if (local->remotepath) {
+ local->xattrinfo.lxattr->file_path = gf_strdup(local->remotepath);
+ if (!local->xattrinfo.lxattr->file_path) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ }
- pthread_join(dthread, (void **)&retval);
+ ret = dict_get_gfuuid(xdata, GF_CS_XATTR_ARCHIVE_UUID,
+ &(local->xattrinfo.lxattr->uuid));
- ret = *retval;
+ if (ret) {
+ gf_uuid_clear(local->xattrinfo.lxattr->uuid);
+ }
+ size = strlen(this->name) - strlen("-cloudsync") + 1;
+ local->xattrinfo.lxattr->volname = GF_CALLOC(1, size, gf_common_mt_char);
+ if (!local->xattrinfo.lxattr->volname) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ strncpy(local->xattrinfo.lxattr->volname, this->name, size - 1);
+ local->xattrinfo.lxattr->volname[size - 1] = '\0';
+
+ return 0;
+err:
+ cs_xattrinfo_wipe(local);
+ return -1;
+}
+
+int
+cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags)
+{
+ xlator_t *this = NULL;
+ cs_private_t *priv = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ cs_local_t *local = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+
+ if (!local->remotepath) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "remote path not"
+ " available. Check posix logs to resolve");
+ goto out;
+ }
+
+ if (!priv->stores) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "No remote store "
+ "plugins found");
+ ret = -1;
+ goto out;
+ }
+
+ if (local->fd) {
+ fd = fd_anonymous(local->fd->inode);
+ } else {
+ fd = fd_anonymous(local->loc.inode);
+ }
+
+ local->xattrinfo.size = size;
+ local->xattrinfo.offset = offset;
+ local->xattrinfo.flags = flags;
+
+ if (!fd) {
+ gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed");
+ ret = -1;
+ goto out;
+ }
+
+ local->dlfd = fd;
+ local->dloffset = offset;
+
+ /*this calling method is for per volume setting */
+ ret = priv->stores->rdfop(frame, priv->stores->config);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "read failed"
+ ", remotepath: %s",
+ local->remotepath);
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "read success, path"
+ " : %s",
+ local->remotepath);
+ }
out:
- if (retval)
- GF_FREE(retval);
+ if (fd) {
+ fd_unref(fd);
+ local->dlfd = NULL;
+ }
+ return ret;
+}
+
+int32_t
+cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+ uint64_t val = 0;
+ fd_t *fd = NULL;
+
+ local = frame->local;
+ fd = local->fd;
+
+ local->call_cnt++;
+
+ if (op_ret == -1) {
+ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
+ if (ret == 0) {
+ if (val == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "could not get file state, unwinding");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ } else {
+ __cs_inode_ctx_update(this, fd->inode, val);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val);
+
+ if (local->call_cnt == 1 &&
+ (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ " will read from remote : %" PRIu64, val);
+ goto repair;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "second readv, Unwinding");
+ goto unwind;
+ }
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state "
+ "could not be figured, unwinding");
+ goto unwind;
+ }
+ } else {
+ /* successful readv => file is local */
+ __cs_inode_ctx_update(this, fd->inode, GF_CS_LOCAL);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "state : GF_CS_LOCAL"
+ ", readv successful");
+
+ goto unwind;
+ }
+
+repair:
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ CS_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int32_t
+cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = 0;
+
+ ret = cs_resume_postprocess(this, frame, fd->inode);
+ if (ret) {
+ goto unwind;
+ }
+
+ cs_inodelk_unlock(frame);
+
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+
+ return 0;
+
+unwind:
+ cs_inodelk_unlock(frame);
+
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+int32_t
+cs_resume_remote_readv(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = 0;
+ cs_local_t *local = NULL;
+ gf_cs_obj_state state = -1;
+ cs_inode_ctx_t *ctx = NULL;
+
+ cs_inodelk_unlock(frame);
+
+ local = frame->local;
+ if (!local) {
+ ret = -1;
+ goto unwind;
+ }
+
+ __cs_inode_ctx_get(this, fd->inode, &ctx);
+
+ state = __cs_get_file_state(fd->inode, ctx);
+ if (state == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "status is GF_CS_ERROR."
+ " Aborting readv");
+ local->op_ret = -1;
+ local->op_errno = EREMOTE;
+ ret = -1;
+ goto unwind;
+ }
+
+ /* Serve readv from remote store only if it is remote. */
+ gf_msg_debug(this->name, 0, "status of file %s is %d",
+ local->remotepath ? local->remotepath : "", state);
+
+ /* We will reach this condition if local inode ctx had REMOTE
+ * state when the control was in cs_readv but after stat
+ * we got an updated state saying that the file is LOCAL.
+ */
+ if (state == GF_CS_LOCAL) {
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ } else if (state == GF_CS_REMOTE) {
+ ret = cs_resume_remote_readv_postprocess(this, frame, fd->inode, offset,
+ size, flags);
+ /* Failed to submit the remote readv fop to plugin */
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EREMOTE;
+ goto unwind;
+ }
+ /* When the file is in any other intermediate state,
+ * we should not perform remote reads.
+ */
+ } else {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+int32_t
+cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ cs_local_t *local = NULL;
+ int ret = 0;
+ cs_inode_ctx_t *ctx = NULL;
+ gf_cs_obj_state state = -1;
+ cs_private_t *priv = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ priv = this->private;
+
+ local = cs_local_init(this, frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed");
+ goto err;
+ }
+
+ __cs_inode_ctx_get(this, fd->inode, &ctx);
+
+ if (ctx)
+ state = __cs_get_file_state(fd->inode, ctx);
+ else
+ state = GF_CS_LOCAL;
+
+ local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new());
+
+ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+ if (priv->remote_read) {
+ local->stub = fop_readv_stub(frame, cs_resume_remote_readv, fd, size,
+ offset, flags, xdata);
+ } else {
+ local->stub = fop_readv_stub(frame, cs_resume_readv, fd, size, offset,
+ flags, xdata);
+ }
+ if (!local->stub) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ goto err;
+ }
+
+ if (state == GF_CS_LOCAL) {
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ } else {
+ local->call_cnt++;
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ CS_STACK_UNWIND(readv, frame, -1, op_errno, NULL, -1, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame,
+ inode_t *inode, off_t offset, size_t size,
+ uint32_t flags)
+{
+ int ret = 0;
+
+ ret = cs_serve_readv(frame, offset, size, flags);
return ret;
}
@@ -1059,7 +1448,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto err;
} else {
ret = __cs_inode_ctx_update(this, inode, val);
- gf_msg_debug(this->name, 0, "status : %lu", val);
+ gf_msg_debug(this->name, 0, "status : %" PRIu64, val);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
local->op_ret = -1;
@@ -1074,7 +1463,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
goto err;
}
- ret = dict_get_str(xdata, GF_CS_OBJECT_REMOTE, &filepath);
+ ret = dict_get_str_sizen(xdata, GF_CS_OBJECT_REMOTE, &filepath);
if (filepath) {
gf_msg_debug(this->name, 0, "filepath returned %s", filepath);
local->remotepath = gf_strdup(filepath);
@@ -1087,6 +1476,10 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
gf_msg_debug(this->name, 0, "NULL filepath");
}
+ ret = cs_update_xattrs(frame, xdata);
+ if (ret)
+ goto err;
+
local->op_ret = 0;
local->xattr_rsp = dict_ref(xdata);
memcpy(&local->stbuf, stbuf, sizeof(struct iatt));
@@ -1121,6 +1514,8 @@ cs_do_stat_check(call_frame_t *main_frame)
goto err;
}
+ cs_set_xattr_req(main_frame);
+
if (local->fd) {
STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat, local->fd, local->xattr_req);
@@ -1177,6 +1572,10 @@ cs_common_cbk(call_frame_t *frame)
NULL, NULL, NULL);
break;
+ case GF_FOP_TRUNCATE:
+ CS_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+ NULL, NULL, NULL);
+ break;
default:
break;
}
@@ -1354,7 +1753,7 @@ cs_resume_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
__cs_inode_ctx_get(this, loc->inode, &ctx);
- state = __cs_get_file_state(this, loc->inode, ctx);
+ state = __cs_get_file_state(loc->inode, ctx);
if (state == GF_CS_ERROR) {
/* file is already remote */
@@ -1396,7 +1795,7 @@ unwind:
}
gf_cs_obj_state
-__cs_get_file_state(xlator_t *this, inode_t *inode, cs_inode_ctx_t *ctx)
+__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx)
{
gf_cs_obj_state state = -1;
@@ -1427,7 +1826,7 @@ __cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx)
if (ret)
*ctx = NULL;
else
- *ctx = (cs_inode_ctx_t *)ctxint;
+ *ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
return;
}
@@ -1452,7 +1851,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val)
ctx->state = val;
- ctxint = (uint64_t)ctx;
+ ctxint = (uint64_t)(uintptr_t)ctx;
ret = __inode_ctx_set(inode, this, &ctxint);
if (ret) {
@@ -1460,7 +1859,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val)
goto out;
}
} else {
- ctx = (cs_inode_ctx_t *)ctxint;
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
ctx->state = val;
}
@@ -1483,7 +1882,7 @@ cs_inode_ctx_reset(xlator_t *this, inode_t *inode)
return 0;
}
- ctx = (cs_inode_ctx_t *)ctxint;
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
GF_FREE(ctx);
return 0;
@@ -1505,7 +1904,7 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode)
__cs_inode_ctx_get(this, inode, &ctx);
- state = __cs_get_file_state(this, inode, ctx);
+ state = __cs_get_file_state(inode, ctx);
if (state == GF_CS_ERROR) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0,
"status is GF_CS_ERROR."
@@ -1532,6 +1931,7 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode)
out:
return ret;
}
+
int32_t
cs_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict)
{
@@ -1627,7 +2027,9 @@ struct xlator_fops cs_fops = {
.zerofill = cs_zerofill,
};
-struct xlator_cbks cs_cbks = {};
+struct xlator_cbks cs_cbks = {
+ .forget = cs_forget,
+};
struct xlator_dumpops cs_dumpops = {
.fdctx_to_dict = cs_fdctx_to_dict,
@@ -1647,6 +2049,15 @@ struct volume_options cs_options[] = {
{.key = {"cloudsync-storetype"},
.type = GF_OPTION_TYPE_STR,
.description = "Defines which remote store is enabled"},
+ {.key = {"cloudsync-remote-read"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Defines a remote read fop when on"},
+ {.key = {"cloudsync-store-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Defines a volume wide store id"},
+ {.key = {"cloudsync-product-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Defines a volume wide product id"},
{.key = {NULL}},
};
diff --git a/xlators/features/cloudsync/src/cloudsync.h b/xlators/features/cloudsync/src/cloudsync.h
index 7c70c744d2b..d24141978d6 100644
--- a/xlators/features/cloudsync/src/cloudsync.h
+++ b/xlators/features/cloudsync/src/cloudsync.h
@@ -11,14 +11,15 @@
#ifndef __CLOUDSYNC_H__
#define __CLOUDSYNC_H__
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "syncop.h"
-#include "call-stub.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
#include "cloudsync-common.h"
#include "cloudsync-autogen-fops.h"
+#define ALIGN_SIZE 4096
#define CS_LOCK_DOMAIN "cs.protect.file.stat"
typedef struct cs_dlstore {
off_t off;
@@ -29,6 +30,7 @@ typedef struct cs_dlstore {
} cs_dlstore;
typedef struct cs_inode_ctx {
+ cs_loc_xattr_t locxattr;
gf_cs_obj_state state;
} cs_inode_ctx_t;
@@ -85,7 +87,7 @@ void
__cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx);
gf_cs_obj_state
-__cs_get_file_state(xlator_t *this, inode_t *inode, cs_inode_ctx_t *ctx);
+__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx);
int
cs_inodelk_unlock(call_frame_t *main_frame);
@@ -100,4 +102,22 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t
cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
off_t offset, dict_t *xattr_req);
+
+int32_t
+cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
+int32_t
+cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+int32_t
+cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int
+cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame,
+ inode_t *inode, off_t offset, size_t size,
+ uint32_t flags);
+int
+cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags);
#endif /* __CLOUDSYNC_H__ */
diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c
index 71f446d51cd..f973ff56cf5 100644
--- a/xlators/features/compress/src/cdc-helper.c
+++ b/xlators/features/compress/src/cdc-helper.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "logging.h"
-#include "syscall.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
#include "cdc.h"
#include "cdc-mem-types.h"
diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h
index 56a5a05ee8c..928afdd2efe 100644
--- a/xlators/features/compress/src/cdc-mem-types.h
+++ b/xlators/features/compress/src/cdc-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __CDC_MEM_TYPES_H
#define __CDC_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_cdc_mem_types {
gf_cdc_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c
index adace6bcca8..b0b51e914ed 100644
--- a/xlators/features/compress/src/cdc.c
+++ b/xlators/features/compress/src/cdc.c
@@ -10,9 +10,9 @@
#include <sys/uio.h>
-#include "xlator.h"
-#include "defaults.h"
-#include "logging.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
#include "cdc.h"
#include "cdc-mem-types.h"
@@ -334,3 +334,15 @@ struct volume_options options[] = {
"to disk as a gzip file."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "cdc",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h
index 764f2028c75..cb87b06a989 100644
--- a/xlators/features/compress/src/cdc.h
+++ b/xlators/features/compress/src/cdc.h
@@ -15,7 +15,7 @@
#include "zlib.h"
#endif
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h
index ee7fd794da8..1c4d0b93de2 100644
--- a/xlators/features/gfid-access/src/gfid-access-mem-types.h
+++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _GFID_ACCESS_MEM_TYPES_H
#define _GFID_ACCESS_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_changelog_mem_types {
gf_gfid_access_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c
index 0cf7870856b..3fea5672a21 100644
--- a/xlators/features/gfid-access/src/gfid-access.c
+++ b/xlators/features/gfid-access/src/gfid-access.c
@@ -8,9 +8,9 @@
cases as published by the Free Software Foundation.
*/
#include "gfid-access.h"
-#include "inode.h"
-#include "byte-order.h"
-#include "statedump.h"
+#include <glusterfs/inode.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
int
ga_valid_inode_loc_copy(loc_t *dst, loc_t *src, xlator_t *this)
@@ -327,10 +327,8 @@ out:
static gf_boolean_t
__is_gfid_access_dir(uuid_t gfid)
{
- uuid_t aux_gfid;
-
- memset(aux_gfid, 0, 16);
- aux_gfid[15] = GF_AUX_GFID;
+ static uuid_t aux_gfid = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, GF_AUX_GFID};
if (gf_uuid_compare(gfid, aux_gfid) == 0)
return _gf_true;
@@ -448,14 +446,6 @@ ga_new_entry(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
0,
};
- args = ga_newfile_parse_args(this, data);
- if (!args)
- goto out;
-
- ret = gf_uuid_parse(args->gfid, gfid);
- if (ret)
- goto out;
-
if (!xdata) {
xdata = dict_new();
} else {
@@ -467,6 +457,14 @@ ga_new_entry(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
goto out;
}
+ args = ga_newfile_parse_args(this, data);
+ if (!args)
+ goto out;
+
+ ret = gf_uuid_parse(args->gfid, gfid);
+ if (ret)
+ goto out;
+
ret = ga_fill_tmp_loc(loc, this, gfid, args->bname, xdata, &tmp_loc);
if (ret)
goto out;
@@ -1408,3 +1406,15 @@ struct volume_options options[] = {
/* This translator doesn't take any options, or provide any options */
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "gfid-access",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h
index 68ebe539564..b1e255e56c0 100644
--- a/xlators/features/gfid-access/src/gfid-access.h
+++ b/xlators/features/gfid-access/src/gfid-access.h
@@ -10,11 +10,11 @@
#ifndef __GFID_ACCESS_H__
#define __GFID_ACCESS_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "gfid-access-mem-types.h"
#define UUID_CANONICAL_FORM_LEN 36
diff --git a/xlators/features/glupy/Makefile.am b/xlators/features/glupy/Makefile.am
deleted file mode 100644
index 060429ecf0f..00000000000
--- a/xlators/features/glupy/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src examples
-
-CLEANFILES =
diff --git a/xlators/features/glupy/doc/README.md b/xlators/features/glupy/doc/README.md
deleted file mode 100644
index 4b8b863ef39..00000000000
--- a/xlators/features/glupy/doc/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-This is just the very start for a GlusterFS[1] meta-translator that will
-allow translator code to be written in Python. It's based on the standard
-Python embedding (not extending) techniques, plus a dash of the ctypes module.
-The interface is a pretty minimal adaptation of the dispatches and callbacks
-from the C API[2] to Python, as follows:
-
-* Dispatch functions and callbacks must be defined on an "xlator" class
- derived from gluster.Translator so that they'll be auto-registered with
- the C translator during initialization.
-
-* For each dispatch or callback function you want to intercept, you define a
- Python function using the xxx\_fop\_t or xxx\_cbk\_t decorator.
-
-* The arguments for each operation are different, so you'll need to refer to
- the C API. GlusterFS-specific types are used (though only loc\_t is fully
- defined so far) and type correctness is enforced by ctypes.
-
-* If you do intercept a dispatch function, it is your responsibility to call
- xxx\_wind (like STACK\_WIND in the C API but operation-specific) to pass
- the request to the next translator. If you do not intercept a function, it
- will default the same way as for C (pass through to the same operation with
- the same arguments on the first child translator).
-
-* If you intercept a callback function, it is your responsibility to call
- xxx\_unwind (like STACK\_UNWIND\_STRICT in the C API) to pass the request back
- to the caller.
-
-So far only the lookup and create operations are handled this way, to support
-the "negative lookup" example. Now that the basic infrastructure is in place,
-adding more functions should be very quick, though with that much boilerplate I
-might pause to write a code generator. I also plan to add structure
-definitions and interfaces for some of the utility functions in libglusterfs
-(especially those having to do with inode and fd context) in the fairly near
-future. Note that you can also use ctypes to get at anything not explicitly
-exposed to Python already.
-
-_If you're coming here because of the Linux Journal article, please note that
-the code has evolved since that was written. The version that matches the
-article is here:_
-
-https://github.com/jdarcy/glupy/tree/4bbae91ba459ea46ef32f2966562492e4ca9187a
-
-[1] http://www.gluster.org
-[2] http://pl.atyp.us/hekafs.org/dist/xlator_api_2.html
diff --git a/xlators/features/glupy/doc/TESTING b/xlators/features/glupy/doc/TESTING
deleted file mode 100644
index e05f17f498f..00000000000
--- a/xlators/features/glupy/doc/TESTING
+++ /dev/null
@@ -1,9 +0,0 @@
-Loading a translator written in Python using the glupy meta translator
--------------------------------------------------------------------------------
-'test.vol' is a simple volfile with the debug-trace Python translator on top
-of a brick. The volfile can be mounted using the following command.
-
-$ glusterfs --debug -f test.vol /path/to/mntpt
-
-If then file operations are performed on the newly mounted file system, log
-output would be printed by the Python translator on the standard output.
diff --git a/xlators/features/glupy/doc/test.vol b/xlators/features/glupy/doc/test.vol
deleted file mode 100644
index 0751a488c1f..00000000000
--- a/xlators/features/glupy/doc/test.vol
+++ /dev/null
@@ -1,10 +0,0 @@
-volume vol-posix
- type storage/posix
- option directory /path/to/brick
-end-volume
-
-volume vol-glupy
- type features/glupy
- option module-name debug-trace
- subvolumes vol-posix
-end-volume
diff --git a/xlators/features/glupy/examples/Makefile.am b/xlators/features/glupy/examples/Makefile.am
deleted file mode 100644
index c26abeaafb6..00000000000
--- a/xlators/features/glupy/examples/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-
-glupyexamplesdir = $(xlatordir)/glupy
-
-glupyexamples_PYTHON = negative.py helloworld.py debug-trace.py
diff --git a/xlators/features/glupy/examples/debug-trace.py b/xlators/features/glupy/examples/debug-trace.py
deleted file mode 100644
index 6e012f6c547..00000000000
--- a/xlators/features/glupy/examples/debug-trace.py
+++ /dev/null
@@ -1,777 +0,0 @@
-
-from __future__ import print_function
-import sys
-import stat
-from uuid import UUID
-from time import strftime, localtime
-from gluster.glupy import *
-
-# This translator was written primarily to test the fop entry point definitions
-# and structure definitions in 'glupy.py'.
-
-# It is similar to the C language debug-trace translator, which logs the
-# arguments passed to the fops and their corresponding cbk functions.
-
-dl.get_id.restype = c_long
-dl.get_id.argtypes = [ POINTER(call_frame_t) ]
-
-dl.get_rootunique.restype = c_uint64
-dl.get_rootunique.argtypes = [ POINTER(call_frame_t) ]
-
-def uuid2str (gfid):
- return str(UUID(''.join(map("{0:02x}".format, gfid))))
-
-
-def st_mode_from_ia (prot, filetype):
- st_mode = 0
- type_bit = 0
- prot_bit = 0
-
- if filetype == IA_IFREG:
- type_bit = stat.S_IFREG
- elif filetype == IA_IFDIR:
- type_bit = stat.S_IFDIR
- elif filetype == IA_IFLNK:
- type_bit = stat.S_IFLNK
- elif filetype == IA_IFBLK:
- type_bit = stat.S_IFBLK
- elif filetype == IA_IFCHR:
- type_bit = stat.S_IFCHR
- elif filetype == IA_IFIFO:
- type_bit = stat.S_IFIFO
- elif filetype == IA_IFSOCK:
- type_bit = stat.S_IFSOCK
- elif filetype == IA_INVAL:
- pass
-
-
- if prot.suid:
- prot_bit |= stat.S_ISUID
- if prot.sgid:
- prot_bit |= stat.S_ISGID
- if prot.sticky:
- prot_bit |= stat.S_ISVTX
-
- if prot.owner.read:
- prot_bit |= stat.S_IRUSR
- if prot.owner.write:
- prot_bit |= stat.S_IWUSR
- if prot.owner.execn:
- prot_bit |= stat.S_IXUSR
-
- if prot.group.read:
- prot_bit |= stat.S_IRGRP
- if prot.group.write:
- prot_bit |= stat.S_IWGRP
- if prot.group.execn:
- prot_bit |= stat.S_IXGRP
-
- if prot.other.read:
- prot_bit |= stat.S_IROTH
- if prot.other.write:
- prot_bit |= stat.S_IWOTH
- if prot.other.execn:
- prot_bit |= stat.S_IXOTH
-
- st_mode = (type_bit | prot_bit)
-
- return st_mode
-
-
-def trace_stat2str (buf):
- gfid = uuid2str(buf.contents.ia_gfid)
- mode = st_mode_from_ia(buf.contents.ia_prot, buf.contents.ia_type)
- atime_buf = strftime("[%b %d %H:%M:%S]",
- localtime(buf.contents.ia_atime))
- mtime_buf = strftime("[%b %d %H:%M:%S]",
- localtime(buf.contents.ia_mtime))
- ctime_buf = strftime("[%b %d %H:%M:%S]",
- localtime(buf.contents.ia_ctime))
- return ("(gfid={0:s}, ino={1:d}, mode={2:o}, nlink={3:d}, uid ={4:d}, "+
- "gid ={5:d}, size={6:d}, blocks={7:d}, atime={8:s}, mtime={9:s}, "+
- "ctime={10:s})").format(gfid, buf.contents.ia_no, mode,
- buf.contents.ia_nlink,
- buf.contents.ia_uid,
- buf.contents.ia_gid,
- buf.contents.ia_size,
- buf.contents.ia_blocks,
- atime_buf, mtime_buf,
- ctime_buf)
-
-class xlator(Translator):
-
- def __init__(self, c_this):
- Translator.__init__(self, c_this)
- self.gfids = {}
-
- def lookup_fop(self, frame, this, loc, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.gfid)
- print(("GLUPY TRACE LOOKUP FOP- {0:d}: gfid={1:s}; " +
- "path={2:s}").format(unique, gfid, loc.contents.path))
- self.gfids[key] = gfid
- dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def lookup_cbk(self, frame, cookie, this, op_ret, op_errno,
- inode, buf, xdata, postparent):
- unique =dl.get_rootunique(frame)
- key =dl.get_id(frame)
- if op_ret == 0:
- gfid = uuid2str(buf.contents.ia_gfid)
- statstr = trace_stat2str(buf)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE LOOKUP CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *buf={3:s}; " +
- "*postparent={4:s}").format(unique, gfid,
- op_ret, statstr,
- postparentstr))
- else:
- gfid = self.gfids[key]
- print(("GLUPY TRACE LOOKUP CBK - {0:d}: gfid={1:s};" +
- " op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_lookup(frame, cookie, this, op_ret, op_errno,
- inode, buf, xdata, postparent)
- return 0
-
- def create_fop(self, frame, this, loc, flags, mode, umask, fd,
- xdata):
- unique = dl.get_rootunique(frame)
- gfid = uuid2str(loc.contents.gfid)
- print(("GLUPY TRACE CREATE FOP- {0:d}: gfid={1:s}; path={2:s}; " +
- "fd={3:s}; flags=0{4:o}; mode=0{5:o}; " +
- "umask=0{6:o}").format(unique, gfid, loc.contents.path,
- fd, flags, mode, umask))
- dl.wind_create(frame, POINTER(xlator_t)(), loc, flags, mode,
- umask, fd, xdata)
- return 0
-
- def create_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
- inode, buf, preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- if op_ret >= 0:
- gfid = uuid2str(inode.contents.gfid)
- statstr = trace_stat2str(buf)
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE CREATE CBK- {0:d}: gfid={1:s};" +
- " op_ret={2:d}; fd={3:s}; *stbuf={4:s}; " +
- "*preparent={5:s};" +
- " *postparent={6:s}").format(unique, gfid, op_ret,
- fd, statstr,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE CREATE CBK- {0:d}: op_ret={1:d}; " +
- "op_errno={2:d}").format(unique, op_ret, op_errno))
- dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd,
- inode, buf, preparent, postparent, xdata)
- return 0
-
- def open_fop(self, frame, this, loc, flags, fd, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE OPEN FOP- {0:d}: gfid={1:s}; path={2:s}; "+
- "flags={3:d}; fd={4:s}").format(unique, gfid,
- loc.contents.path, flags,
- fd))
- self.gfids[key] = gfid
- dl.wind_open(frame, POINTER(xlator_t)(), loc, flags, fd, xdata)
- return 0
-
- def open_cbk(self, frame, cookie, this, op_ret, op_errno, fd, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE OPEN CBK- {0:d}: gfid={1:s}; op_ret={2:d}; "
- "op_errno={3:d}; *fd={4:s}").format(unique, gfid,
- op_ret, op_errno, fd))
- del self.gfids[key]
- dl.unwind_open(frame, cookie, this, op_ret, op_errno, fd,
- xdata)
- return 0
-
- def readv_fop(self, frame, this, fd, size, offset, flags, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE READV FOP- {0:d}: gfid={1:s}; "+
- "fd={2:s}; size ={3:d}; offset={4:d}; " +
- "flags=0{5:x}").format(unique, gfid, fd, size, offset,
- flags))
- self.gfids[key] = gfid
- dl.wind_readv (frame, POINTER(xlator_t)(), fd, size, offset,
- flags, xdata)
- return 0
-
- def readv_cbk(self, frame, cookie, this, op_ret, op_errno, vector,
- count, buf, iobref, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret >= 0:
- statstr = trace_stat2str(buf)
- print(("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
- "op_ret={2:d}; *buf={3:s};").format(unique, gfid,
- op_ret,
- statstr))
-
- else:
- print(("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_readv (frame, cookie, this, op_ret, op_errno,
- vector, count, buf, iobref, xdata)
- return 0
-
- def writev_fop(self, frame, this, fd, vector, count, offset, flags,
- iobref, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE WRITEV FOP- {0:d}: gfid={1:s}; " +
- "fd={2:s}; count={3:d}; offset={4:d}; " +
- "flags=0{5:x}").format(unique, gfid, fd, count, offset,
- flags))
- self.gfids[key] = gfid
- dl.wind_writev(frame, POINTER(xlator_t)(), fd, vector, count,
- offset, flags, iobref, xdata)
- return 0
-
- def writev_cbk(self, frame, cookie, this, op_ret, op_errno, prebuf,
- postbuf, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- if op_ret >= 0:
- preopstr = trace_stat2str(prebuf)
- postopstr = trace_stat2str(postbuf)
- print(("GLUPY TRACE WRITEV CBK- {0:d}: op_ret={1:d}; " +
- "*prebuf={2:s}; " +
- "*postbuf={3:s}").format(unique, op_ret, preopstr,
- postopstr))
- else:
- gfid = self.gfids[key]
- print(("GLUPY TRACE WRITEV CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_writev (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata)
- return 0
-
- def opendir_fop(self, frame, this, loc, fd, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE OPENDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
- "fd={3:s}").format(unique, gfid, loc.contents.path, fd))
- self.gfids[key] = gfid
- dl.wind_opendir(frame, POINTER(xlator_t)(), loc, fd, xdata)
- return 0
-
- def opendir_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE OPENDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
- " op_errno={3:d}; fd={4:s}").format(unique, gfid, op_ret,
- op_errno, fd))
- del self.gfids[key]
- dl.unwind_opendir(frame, cookie, this, op_ret, op_errno,
- fd, xdata)
- return 0
-
- def readdir_fop(self, frame, this, fd, size, offset, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE READDIR FOP- {0:d}: gfid={1:s}; fd={2:s}; " +
- "size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
- offset))
- self.gfids[key] = gfid
- dl.wind_readdir(frame, POINTER(xlator_t)(), fd, size, offset,
- xdata)
- return 0
-
- def readdir_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE READDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
- " op_errno={3:d}").format(unique, gfid, op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_readdir(frame, cookie, this, op_ret, op_errno, buf,
- xdata)
- return 0
-
- def readdirp_fop(self, frame, this, fd, size, offset, dictionary):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE READDIRP FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
- " size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
- offset))
- self.gfids[key] = gfid
- dl.wind_readdirp(frame, POINTER(xlator_t)(), fd, size, offset,
- dictionary)
- return 0
-
- def readdirp_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE READDIRP CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_readdirp(frame, cookie, this, op_ret, op_errno, buf,
- xdata)
- return 0
-
- def mkdir_fop(self, frame, this, loc, mode, umask, xdata):
- unique = dl.get_rootunique(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE MKDIR FOP- {0:d}: gfid={1:s}; path={2:s}; " +
- "mode={3:d}; umask=0{4:o}").format(unique, gfid,
- loc.contents.path, mode,
- umask))
- dl.wind_mkdir(frame, POINTER(xlator_t)(), loc, mode, umask,
- xdata)
- return 0
-
- def mkdir_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- if op_ret == 0:
- gfid = uuid2str(inode.contents.gfid)
- statstr = trace_stat2str(buf)
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE MKDIR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *stbuf={3:s}; *prebuf={4:s}; "+
- "*postbuf={5:s} ").format(unique, gfid, op_ret,
- statstr,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE MKDIR CBK- {0:d}: op_ret={1:d}; "+
- "op_errno={2:d}").format(unique, op_ret, op_errno))
- dl.unwind_mkdir(frame, cookie, this, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata)
- return 0
-
- def rmdir_fop(self, frame, this, loc, flags, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE RMDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
- "flags={3:d}").format(unique, gfid, loc.contents.path,
- flags))
- self.gfids[key] = gfid
- dl.wind_rmdir(frame, POINTER(xlator_t)(), loc, flags, xdata)
- return 0
-
- def rmdir_cbk(self, frame, cookie, this, op_ret, op_errno, preparent,
- postparent, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *prebuf={3:s}; "+
- "*postbuf={4:s}").format(unique, gfid, op_ret,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_rmdir(frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata)
- return 0
-
- def stat_fop(self, frame, this, loc, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE STAT FOP- {0:d}: gfid={1:s}; " +
- " path={2:s}").format(unique, gfid, loc.contents.path))
- self.gfids[key] = gfid
- dl.wind_stat(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def stat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(buf)
- print(("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *buf={3:s};").format(unique,
- gfid,
- op_ret,
- statstr))
- else:
- print(("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_stat(frame, cookie, this, op_ret, op_errno,
- buf, xdata)
- return 0
-
- def fstat_fop(self, frame, this, fd, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE FSTAT FOP- {0:d}: gfid={1:s}; " +
- "fd={2:s}").format(unique, gfid, fd))
- self.gfids[key] = gfid
- dl.wind_fstat(frame, POINTER(xlator_t)(), fd, xdata)
- return 0
-
- def fstat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(buf)
- print(("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
- " op_ret={2:d}; *buf={3:s}").format(unique,
- gfid,
- op_ret,
- statstr))
- else:
- print(("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
- "op_ret={2:d}; op_errno={3:d}").format(unique.
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_fstat(frame, cookie, this, op_ret, op_errno,
- buf, xdata)
- return 0
-
- def statfs_fop(self, frame, this, loc, xdata):
- unique = dl.get_rootunique(frame)
- if loc.contents.inode:
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- else:
- gfid = "0"
- print(("GLUPY TRACE STATFS FOP- {0:d}: gfid={1:s}; "+
- "path={2:s}").format(unique, gfid, loc.contents.path))
- dl.wind_statfs(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def statfs_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
- xdata):
- unique = dl.get_rootunique(frame)
- if op_ret == 0:
- #TBD: print buf (pointer to an iovec type object)
- print(("GLUPY TRACE STATFS CBK {0:d}: "+
- "op_ret={1:d}").format(unique, op_ret))
- else:
- print(("GLUPY TRACE STATFS CBK- {0:d}"+
- "op_ret={1:d}; op_errno={2:d}").format(unique,
- op_ret,
- op_errno))
- dl.unwind_statfs(frame, cookie, this, op_ret, op_errno,
- buf, xdata)
- return 0
-
- def getxattr_fop(self, frame, this, loc, name, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE GETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
- " name={3:s}").format(unique, gfid, loc.contents.path,
- name))
- self.gfids[key]=gfid
- dl.wind_getxattr(frame, POINTER(xlator_t)(), loc, name, xdata)
- return 0
-
- def getxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
- dictionary, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE GETXATTR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}; "+
- " dictionary={4:s}").format(unique, gfid, op_ret, op_errno,
- dictionary))
- del self.gfids[key]
- dl.unwind_getxattr(frame, cookie, this, op_ret, op_errno,
- dictionary, xdata)
- return 0
-
- def fgetxattr_fop(self, frame, this, fd, name, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE FGETXATTR FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
- "name={3:s}").format(unique, gfid, fd, name))
- self.gfids[key] = gfid
- dl.wind_fgetxattr(frame, POINTER(xlator_t)(), fd, name, xdata)
- return 0
-
- def fgetxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
- dictionary, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE FGETXATTR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d};"+
- " dictionary={4:s}").format(unique, gfid, op_ret,
- op_errno, dictionary))
- del self.gfids[key]
- dl.unwind_fgetxattr(frame, cookie, this, op_ret, op_errno,
- dictionary, xdata)
- return 0
-
- def setxattr_fop(self, frame, this, loc, dictionary, flags, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE SETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
- " flags={3:d}").format(unique, gfid, loc.contents.path,
- flags))
- self.gfids[key] = gfid
- dl.wind_setxattr(frame, POINTER(xlator_t)(), loc, dictionary,
- flags, xdata)
- return 0
-
- def setxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE SETXATTR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_setxattr(frame, cookie, this, op_ret, op_errno,
- xdata)
- return 0
-
- def fsetxattr_fop(self, frame, this, fd, dictionary, flags, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(fd.contents.inode.contents.gfid)
- print(("GLUPY TRACE FSETXATTR FOP- {0:d}: gfid={1:s}; fd={2:p}; "+
- "flags={3:d}").format(unique, gfid, fd, flags))
- self.gfids[key] = gfid
- dl.wind_fsetxattr(frame, POINTER(xlator_t)(), fd, dictionary,
- flags, xdata)
- return 0
-
- def fsetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE FSETXATTR CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_fsetxattr(frame, cookie, this, op_ret, op_errno,
- xdata)
- return 0
-
- def removexattr_fop(self, frame, this, loc, name, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE REMOVEXATTR FOP- {0:d}: gfid={1:s}; "+
- "path={2:s}; name={3:s}").format(unique, gfid,
- loc.contents.path,
- name))
- self.gfids[key] = gfid
- dl.wind_removexattr(frame, POINTER(xlator_t)(), loc, name,
- xdata)
- return 0
-
- def removexattr_cbk(self, frame, cookie, this, op_ret, op_errno,
- xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- print(("GLUPY TRACE REMOVEXATTR CBK- {0:d}: gfid={1:s} "+
- " op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_removexattr(frame, cookie, this, op_ret, op_errno,
- xdata)
- return 0
-
- def link_fop(self, frame, this, oldloc, newloc, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- if (newloc.contents.inode):
- newgfid = uuid2str(newloc.contents.inode.contents.gfid)
- else:
- newgfid = "0"
- oldgfid = uuid2str(oldloc.contents.inode.contents.gfid)
- print(("GLUPY TRACE LINK FOP-{0:d}: oldgfid={1:s}; oldpath={2:s};"+
- "newgfid={3:s};"+
- "newpath={4:s}").format(unique, oldgfid,
- oldloc.contents.path,
- newgfid,
- newloc.contents.path))
- self.gfids[key] = oldgfid
- dl.wind_link(frame, POINTER(xlator_t)(), oldloc, newloc,
- xdata)
- return 0
-
- def link_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(buf)
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE LINK CBK- {0:d}: op_ret={1:d} "+
- "*stbuf={2:s}; *prebuf={3:s}; "+
- "*postbuf={4:s} ").format(unique, op_ret, statstr,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE LINK CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; "+
- "op_errno={3:d}").format(unique, gfid,
- op_ret, op_errno))
- del self.gfids[key]
- dl.unwind_link(frame, cookie, this, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata)
- return 0
-
- def unlink_fop(self, frame, this, loc, xflag, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE UNLINK FOP- {0:d}; gfid={1:s}; path={2:s}; "+
- "flag={3:d}").format(unique, gfid, loc.contents.path,
- xflag))
- self.gfids[key] = gfid
- dl.wind_unlink(frame, POINTER(xlator_t)(), loc, xflag,
- xdata)
- return 0
-
- def unlink_cbk(self, frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE UNLINK CBK- {0:d}: gfid ={1:s}; "+
- "op_ret={2:d}; *prebuf={3:s}; "+
- "*postbuf={4:s} ").format(unique, gfid, op_ret,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE UNLINK CBK: {0:d}: gfid ={1:s}; "+
- "op_ret={2:d}; "+
- "op_errno={3:d}").format(unique, gfid, op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_unlink(frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata)
- return 0
-
- def readlink_fop(self, frame, this, loc, size, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE READLINK FOP- {0:d}: gfid={1:s}; path={2:s};"+
- " size={3:d}").format(unique, gfid, loc.contents.path,
- size))
- self.gfids[key] = gfid
- dl.wind_readlink(frame, POINTER(xlator_t)(), loc, size,
- xdata)
- return 0
-
- def readlink_cbk(self, frame, cookie, this, op_ret, op_errno,
- buf, stbuf, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(stbuf)
- print(("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
- " op_ret={2:d}; op_errno={3:d}; *prebuf={4:s}; "+
- "*postbuf={5:s} ").format(unique, gfid,
- op_ret, op_errno,
- buf, statstr))
- else:
- print(("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
- " op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_readlink(frame, cookie, this, op_ret, op_errno, buf,
- stbuf, xdata)
- return 0
-
- def symlink_fop(self, frame, this, linkpath, loc, umask, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = uuid2str(loc.contents.inode.contents.gfid)
- print(("GLUPY TRACE SYMLINK FOP- {0:d}: gfid={1:s}; "+
- "linkpath={2:s}; path={3:s};"+
- "umask=0{4:o}").format(unique, gfid, linkpath,
- loc.contents.path, umask))
- self.gfids[key] = gfid
- dl.wind_symlink(frame, POINTER(xlator_t)(), linkpath, loc,
- umask, xdata)
- return 0
-
- def symlink_cbk(self, frame, cookie, this, op_ret, op_errno,
- inode, buf, preparent, postparent, xdata):
- unique = dl.get_rootunique(frame)
- key = dl.get_id(frame)
- gfid = self.gfids[key]
- if op_ret == 0:
- statstr = trace_stat2str(buf)
- preparentstr = trace_stat2str(preparent)
- postparentstr = trace_stat2str(postparent)
- print(("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; *stbuf={3:s}; *preparent={4:s}; "+
- "*postparent={5:s}").format(unique, gfid,
- op_ret, statstr,
- preparentstr,
- postparentstr))
- else:
- print(("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
- "op_ret={2:d}; op_errno={3:d}").format(unique,
- gfid,
- op_ret,
- op_errno))
- del self.gfids[key]
- dl.unwind_symlink(frame, cookie, this, op_ret, op_errno,
- inode, buf, preparent, postparent, xdata)
- return 0
diff --git a/xlators/features/glupy/examples/helloworld.py b/xlators/features/glupy/examples/helloworld.py
deleted file mode 100644
index 282f9207949..00000000000
--- a/xlators/features/glupy/examples/helloworld.py
+++ /dev/null
@@ -1,21 +0,0 @@
-
-from __future__ import print_function
-import sys
-from gluster.glupy import *
-
-class xlator (Translator):
-
- def __init__(self, c_this):
- Translator.__init__(self, c_this)
-
- def lookup_fop(self, frame, this, loc, xdata):
- print("Python xlator: Hello!")
- dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
- xdata, postparent):
- print("Python xlator: Hello again!")
- dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, inode, buf,
- xdata, postparent)
- return 0
diff --git a/xlators/features/glupy/examples/negative.py b/xlators/features/glupy/examples/negative.py
deleted file mode 100644
index e04b16aa553..00000000000
--- a/xlators/features/glupy/examples/negative.py
+++ /dev/null
@@ -1,93 +0,0 @@
-
-from __future__ import print_function
-import sys
-from uuid import UUID
-from gluster.glupy import *
-
-# Negative-lookup-caching example. If a file wasn't there the last time we
-# looked, it's probably still not there. This translator keeps track of
-# those failed lookups for us, and returns ENOENT without needing to pass the
-# call any further for repeated requests.
-
-# If we were doing this for real, we'd need separate caches for each xlator
-# instance. The easiest way to do this would be to have xlator.__init__
-# "register" each instance in a module-global dict, with the key as the C
-# translator address and the value as the xlator object itself. For testing
-# and teaching, it's sufficient just to have one cache. The keys are parent
-# GFIDs, and the entries are lists of names within that parent that we know
-# don't exist.
-cache = {}
-
-# TBD: we need a better way of handling per-request data (frame->local in C).
-dl.get_id.restype = c_long
-dl.get_id.argtypes = [ POINTER(call_frame_t) ]
-
-def uuid2str (gfid):
- return str(UUID(''.join(map("{0:02x}".format, gfid))))
-
-class xlator (Translator):
-
- def __init__ (self, c_this):
- self.requests = {}
- Translator.__init__(self, c_this)
-
- def lookup_fop (self, frame, this, loc, xdata):
- pargfid = uuid2str(loc.contents.pargfid)
- print("lookup FOP: %s:%s" % (pargfid, loc.contents.name))
- # Check the cache.
- if pargfid in cache:
- if loc.contents.name in cache[pargfid]:
- print("short-circuiting for %s:%s" % (pargfid,
- loc.contents.name))
- dl.unwind_lookup(frame, 0, this, -1, 2, None, None, None, None)
- return 0
- key = dl.get_id(frame)
- self.requests[key] = (pargfid, loc.contents.name[:])
- # TBD: get real child xl from init, pass it here
- dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
- return 0
-
- def lookup_cbk (self, frame, cookie, this, op_ret, op_errno, inode, buf,
- xdata, postparent):
- print("lookup CBK: %d (%d)" % (op_ret, op_errno))
- key = dl.get_id(frame)
- pargfid, name = self.requests[key]
- # Update the cache.
- if op_ret == 0:
- print("found %s, removing from cache" % name)
- if pargfid in cache:
- cache[pargfid].discard(name)
- elif op_errno == 2: # ENOENT
- print("failed to find %s, adding to cache" % name)
- if pargfid in cache:
- cache[pargfid].add(name)
- else:
- cache[pargfid] = {name}
- del self.requests[key]
- dl.unwind_lookup(frame, cookie, this, op_ret, op_errno,
- inode, buf, xdata, postparent)
- return 0
-
- def create_fop (self, frame, this, loc, flags, mode, umask, fd, xdata):
- pargfid = uuid2str(loc.contents.pargfid)
- print("create FOP: %s:%s" % (pargfid, loc.contents.name))
- key = dl.get_id(frame)
- self.requests[key] = (pargfid, loc.contents.name[:])
- # TBD: get real child xl from init, pass it here
- dl.wind_create(frame, POINTER(xlator_t)(), loc, flags, mode, umask, fd, xdata)
- return 0
-
- def create_cbk (self, frame, cookie, this, op_ret, op_errno, fd, inode,
- buf, preparent, postparent, xdata):
- print("create CBK: %d (%d)" % (op_ret, op_errno))
- key = dl.get_id(frame)
- pargfid, name = self.requests[key]
- # Update the cache.
- if op_ret == 0:
- print("created %s, removing from cache" % name)
- if pargfid in cache:
- cache[pargfid].discard(name)
- del self.requests[key]
- dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata)
- return 0
diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am
deleted file mode 100644
index 817b0d00f61..00000000000
--- a/xlators/features/glupy/src/Makefile.am
+++ /dev/null
@@ -1,36 +0,0 @@
-xlator_LTLIBRARIES = glupy.la
-
-# Ensure GLUSTER_PYTHON_PATH is passed to glupy.so
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-glupydir = $(xlatordir)/glupy
-
-AM_CPPFLAGS = $(GF_CPPFLAGS) \
- -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-
-AM_CFLAGS = -Wall -fno-strict-aliasing \
- -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" \
- -DPATH_GLUSTERFS_GLUPY_MODULE=\"${xlatordir}/glupy${shrext_cmds}\" \
- $(GF_CFLAGS) $(PYTHON_CFLAGS)
-
-# Flags to build glupy.so with
-glupy_la_LDFLAGS = -module -nostartfiles \
- -export-symbols $(top_srcdir)/xlators/features/glupy/src/glupy.sym \
- $(GF_XLATOR_LDFLAGS) $(PYTHON_LIBS)
-
-glupy_la_SOURCES = glupy.c
-glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -lpthread $(LIB_DL)
-
-noinst_HEADERS = glupy.h
-
-# Install __init__.py into the Python site-packages area
-pyglupydir = @BUILD_PYTHON_SITE_PACKAGES@/gluster
-pyglupy_PYTHON = __init__.py
-
-# Install glupy/__init_-.py into the Python site-packages area
-SUBDIRS = glupy
-
-CLEANFILES =
-
-EXTRA_DIST = glupy.sym
diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c
deleted file mode 100644
index d1a111eab4a..00000000000
--- a/xlators/features/glupy/src/glupy.c
+++ /dev/null
@@ -1,2446 +0,0 @@
-/*
- Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <ctype.h>
-#include <dlfcn.h>
-#include <sys/uio.h>
-#include <Python.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-#include "glupy.h"
-
-/* UTILITY FUNCTIONS FOR FOP-SPECIFIC CODE */
-
-pthread_key_t gil_init_key;
-
-PyGILState_STATE
-glupy_enter(void)
-{
- if (!pthread_getspecific(gil_init_key)) {
- PyEval_ReleaseLock();
- (void)pthread_setspecific(gil_init_key, (void *)1);
- }
-
- return PyGILState_Ensure();
-}
-
-void
-glupy_leave(PyGILState_STATE gstate)
-{
- PyGILState_Release(gstate);
-}
-
-/* FOP: LOOKUP */
-
-int32_t
-glupy_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_LOOKUP]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_lookup_cbk_t)(priv->cbks[GLUPY_LOOKUP]))(
- frame, cookie, this, op_ret, op_errno, inode, buf, xdata, postparent);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
- return 0;
-}
-
-int32_t
-glupy_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_LOOKUP]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_lookup_t)(priv->fops[GLUPY_LOOKUP]))(frame, this, loc, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
-void
-wind_lookup(call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_lookup_cbk, xl, xl->fops->lookup, loc, xdata);
-}
-
-void
-unwind_lookup(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
-}
-
-void
-set_lookup_fop(long py_this, fop_lookup_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_LOOKUP] = (long)fop;
-}
-
-void
-set_lookup_cbk(long py_this, fop_lookup_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_LOOKUP] = (long)cbk;
-}
-
-/* FOP: CREATE */
-
-int32_t
-glupy_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_CREATE]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_create_cbk_t)(priv->cbks[GLUPY_CREATE]))(
- frame, cookie, this, op_ret, op_errno, fd, inode, buf, preparent,
- postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-int32_t
-glupy_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_CREATE]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_create_t)(priv->fops[GLUPY_CREATE]))(frame, this, loc, flags,
- mode, umask, fd, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
- xdata);
- return 0;
-}
-
-void
-wind_create(call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_create_cbk, xl, xl->fops->create, loc, flags, mode,
- umask, fd, xdata);
-}
-
-void
-unwind_create(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
-}
-
-void
-set_create_fop(long py_this, fop_create_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_CREATE] = (long)fop;
-}
-
-void
-set_create_cbk(long py_this, fop_create_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_CREATE] = (long)cbk;
-}
-
-/* FOP: OPEN */
-
-int32_t
-glupy_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_OPEN]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_open_cbk_t)(priv->cbks[GLUPY_OPEN]))(
- frame, cookie, this, op_ret, op_errno, fd, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
-
-int32_t
-glupy_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_OPEN]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_open_t)(priv->fops[GLUPY_OPEN]))(frame, this, loc, flags, fd,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-}
-
-void
-wind_open(call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_open_cbk, xl, xl->fops->open, loc, flags, fd,
- xdata);
-}
-
-void
-unwind_open(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
-}
-
-void
-set_open_fop(long py_this, fop_open_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->fops[GLUPY_OPEN] = (long)fop;
-}
-
-void
-set_open_cbk(long py_this, fop_open_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->cbks[GLUPY_OPEN] = (long)cbk;
-}
-
-/* FOP: READV */
-
-int32_t
-glupy_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_READV]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_readv_cbk_t)(priv->cbks[GLUPY_READV]))(
- frame, cookie, this, op_ret, op_errno, vector, count, stbuf, iobref,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
- return 0;
-}
-
-int32_t
-glupy_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_READV]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_readv_t)(priv->fops[GLUPY_READV]))(frame, this, fd, size,
- offset, flags, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
- return 0;
-}
-
-void
-wind_readv(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_readv_cbk, xl, xl->fops->readv, fd, size, offset,
- flags, xdata);
-}
-
-void
-unwind_readv(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
-}
-
-void
-set_readv_fop(long py_this, fop_readv_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->fops[GLUPY_READV] = (long)fop;
-}
-
-void
-set_readv_cbk(long py_this, fop_readv_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->cbks[GLUPY_READV] = (long)cbk;
-}
-
-/* FOP: WRITEV */
-
-int32_t
-glupy_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_WRITEV]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_writev_cbk_t)(priv->cbks[GLUPY_WRITEV]))(
- frame, cookie, this, op_ret, op_errno, prebuf, postbuf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
-}
-
-int32_t
-glupy_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_WRITEV]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_writev_t)(priv->fops[GLUPY_WRITEV]))(
- frame, this, fd, vector, count, offset, flags, iobref, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- flags, iobref, xdata);
- return 0;
-}
-
-void
-wind_writev(call_frame_t *frame, xlator_t *xl, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_writev_cbk, xl, xl->fops->writev, fd, vector, count,
- offset, flags, iobref, xdata);
-}
-
-void
-unwind_writev(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-}
-
-void
-set_writev_fop(long py_this, fop_writev_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->fops[GLUPY_WRITEV] = (long)fop;
-}
-
-void
-set_writev_cbk(long py_this, fop_writev_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
- priv->cbks[GLUPY_WRITEV] = (long)cbk;
-}
-
-/* FOP: OPENDIR */
-
-int32_t
-glupy_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_OPENDIR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_opendir_cbk_t)(priv->cbks[GLUPY_OPENDIR]))(
- frame, cookie, this, op_ret, op_errno, fd, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
-
-int32_t
-glupy_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_OPENDIR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_opendir_t)(priv->fops[GLUPY_OPENDIR]))(frame, this, loc, fd,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
- return 0;
-}
-
-void
-wind_opendir(call_frame_t *frame, xlator_t *xl, loc_t *loc, fd_t *fd,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_opendir_cbk, xl, xl->fops->opendir, loc, fd, xdata);
-}
-
-void
-unwind_opendir(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
-}
-
-void
-set_opendir_fop(long py_this, fop_opendir_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_OPENDIR] = (long)fop;
-}
-
-void
-set_opendir_cbk(long py_this, fop_opendir_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_OPENDIR] = (long)cbk;
-}
-
-/* FOP: READDIR */
-
-int32_t
-glupy_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_READDIR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_readdir_cbk_t)(priv->cbks[GLUPY_READDIR]))(
- frame, cookie, this, op_ret, op_errno, entries, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-int32_t
-glupy_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_READDIR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_readdir_t)(priv->fops[GLUPY_READDIR]))(frame, this, fd, size,
- offset, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
- return 0;
-}
-
-void
-wind_readdir(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_readdir_cbk, xl, xl->fops->readdir, fd, size,
- offset, xdata);
-}
-
-void
-unwind_readdir(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, entries, xdata);
-}
-
-void
-set_readdir_fop(long py_this, fop_readdir_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_READDIR] = (long)fop;
-}
-
-void
-set_readdir_cbk(long py_this, fop_readdir_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_READDIR] = (long)cbk;
-}
-
-/* FOP: READDIRP */
-
-int32_t
-glupy_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_READDIRP]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_readdirp_cbk_t)(priv->cbks[GLUPY_READDIRP]))(
- frame, cookie, this, op_ret, op_errno, entries, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-int32_t
-glupy_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_READDIRP]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_readdirp_t)(priv->fops[GLUPY_READDIRP]))(frame, this, fd, size,
- offset, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- return 0;
-}
-
-void
-wind_readdirp(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_readdirp_cbk, xl, xl->fops->readdirp, fd, size,
- offset, xdata);
-}
-
-void
-unwind_readdirp(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
-}
-
-void
-set_readdirp_fop(long py_this, fop_readdirp_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_READDIRP] = (long)fop;
-}
-
-void
-set_readdirp_cbk(long py_this, fop_readdirp_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_READDIRP] = (long)cbk;
-}
-
-/* FOP:STAT */
-
-int32_t
-glupy_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_STAT]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_stat_cbk_t)(priv->cbks[GLUPY_STAT]))(
- frame, cookie, this, op_ret, op_errno, buf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int32_t
-glupy_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_STAT]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_stat_t)(priv->fops[GLUPY_STAT]))(frame, this, loc, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-}
-
-void
-wind_stat(call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_stat_cbk, xl, xl->fops->stat, loc, xdata);
-}
-
-void
-unwind_stat(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
-}
-
-void
-set_stat_fop(long py_this, fop_stat_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_STAT] = (long)fop;
-}
-
-void
-set_stat_cbk(long py_this, fop_stat_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_STAT] = (long)cbk;
-}
-
-/* FOP: FSTAT */
-
-int32_t
-glupy_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_FSTAT]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_fstat_cbk_t)(priv->cbks[GLUPY_FSTAT]))(
- frame, cookie, this, op_ret, op_errno, buf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int32_t
-glupy_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_FSTAT]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_fstat_t)(priv->fops[GLUPY_FSTAT]))(frame, this, fd, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-}
-
-void
-wind_fstat(call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_fstat_cbk, xl, xl->fops->fstat, fd, xdata);
-}
-
-void
-unwind_fstat(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata);
-}
-
-void
-set_fstat_fop(long py_this, fop_fstat_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_FSTAT] = (long)fop;
-}
-
-void
-set_fstat_cbk(long py_this, fop_fstat_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_FSTAT] = (long)cbk;
-}
-
-/* FOP:STATFS */
-
-int32_t
-glupy_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_STATFS]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_statfs_cbk_t)(priv->cbks[GLUPY_STATFS]))(
- frame, cookie, this, op_ret, op_errno, buf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int32_t
-glupy_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_STATFS]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_statfs_t)(priv->fops[GLUPY_STATFS]))(frame, this, loc, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- return 0;
-}
-
-void
-wind_statfs(call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_statfs_cbk, xl, xl->fops->statfs, loc, xdata);
-}
-
-void
-unwind_statfs(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct statvfs *buf, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata);
-}
-
-void
-set_statfs_fop(long py_this, fop_statfs_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_STATFS] = (long)fop;
-}
-
-void
-set_statfs_cbk(long py_this, fop_statfs_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_STATFS] = (long)cbk;
-}
-
-/* FOP: SETXATTR */
-
-int32_t
-glupy_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_SETXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_setxattr_cbk_t)(priv->cbks[GLUPY_SETXATTR]))(
- frame, cookie, this, op_ret, op_errno, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-glupy_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_SETXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_setxattr_t)(priv->fops[GLUPY_SETXATTR]))(frame, this, loc, dict,
- flags, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-void
-wind_setxattr(call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_setxattr_cbk, xl, xl->fops->setxattr, loc, dict,
- flags, xdata);
-}
-
-void
-unwind_setxattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
-}
-
-void
-set_setxattr_fop(long py_this, fop_setxattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_SETXATTR] = (long)fop;
-}
-
-void
-set_setxattr_cbk(long py_this, fop_setxattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_SETXATTR] = (long)cbk;
-}
-
-/* FOP: GETXATTR */
-
-int32_t
-glupy_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_GETXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_getxattr_cbk_t)(priv->cbks[GLUPY_GETXATTR]))(
- frame, cookie, this, op_ret, op_errno, dict, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int32_t
-glupy_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_GETXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_getxattr_t)(priv->fops[GLUPY_GETXATTR]))(frame, this, loc, name,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
-}
-
-void
-wind_getxattr(call_frame_t *frame, xlator_t *xl, loc_t *loc, const char *name,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_getxattr_cbk, xl, xl->fops->getxattr, loc, name,
- xdata);
-}
-
-void
-unwind_getxattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
-}
-
-void
-set_getxattr_fop(long py_this, fop_getxattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_GETXATTR] = (long)fop;
-}
-
-void
-set_getxattr_cbk(long py_this, fop_getxattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_GETXATTR] = (long)cbk;
-}
-
-/* FOP: FSETXATTR */
-
-int32_t
-glupy_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_FSETXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_fsetxattr_cbk_t)(priv->cbks[GLUPY_FSETXATTR]))(
- frame, cookie, this, op_ret, op_errno, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-glupy_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_FSETXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_fsetxattr_t)(priv->fops[GLUPY_FSETXATTR]))(frame, this, fd,
- dict, flags, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-void
-wind_fsetxattr(call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_fsetxattr_cbk, xl, xl->fops->fsetxattr, fd, dict,
- flags, xdata);
-}
-
-void
-unwind_fsetxattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
-}
-
-void
-set_fsetxattr_fop(long py_this, fop_fsetxattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_FSETXATTR] = (long)fop;
-}
-
-void
-set_fsetxattr_cbk(long py_this, fop_fsetxattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_FSETXATTR] = (long)cbk;
-}
-
-/* FOP: FGETXATTR */
-
-int32_t
-glupy_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_FGETXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_fgetxattr_cbk_t)(priv->cbks[GLUPY_FGETXATTR]))(
- frame, cookie, this, op_ret, op_errno, dict, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int32_t
-glupy_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_FGETXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_fgetxattr_t)(priv->fops[GLUPY_FGETXATTR]))(frame, this, fd,
- name, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
- return 0;
-}
-
-void
-wind_fgetxattr(call_frame_t *frame, xlator_t *xl, fd_t *fd, const char *name,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_fgetxattr_cbk, xl, xl->fops->fgetxattr, fd, name,
- xdata);
-}
-
-void
-unwind_fgetxattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata);
-}
-
-void
-set_fgetxattr_fop(long py_this, fop_fgetxattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_FGETXATTR] = (long)fop;
-}
-
-void
-set_fgetxattr_cbk(long py_this, fop_fgetxattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_FGETXATTR] = (long)cbk;
-}
-
-/* FOP:REMOVEXATTR */
-
-int32_t
-glupy_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_REMOVEXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_removexattr_cbk_t)(priv->cbks[GLUPY_REMOVEXATTR]))(
- frame, cookie, this, op_ret, op_errno, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-glupy_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_REMOVEXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_removexattr_t)(priv->fops[GLUPY_REMOVEXATTR]))(frame, this, loc,
- name, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
-
-void
-wind_removexattr(call_frame_t *frame, xlator_t *xl, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_removexattr_cbk, xl, xl->fops->removexattr, loc,
- name, xdata);
-}
-
-void
-unwind_removexattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
-}
-
-void
-set_removexattr_fop(long py_this, fop_removexattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_REMOVEXATTR] = (long)fop;
-}
-
-void
-set_removexattr_cbk(long py_this, fop_removexattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_REMOVEXATTR] = (long)cbk;
-}
-
-/* FOP:FREMOVEXATTR */
-
-int32_t
-glupy_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_FREMOVEXATTR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_fremovexattr_cbk_t)(priv->cbks[GLUPY_FREMOVEXATTR]))(
- frame, cookie, this, op_ret, op_errno, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-glupy_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_FREMOVEXATTR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_fremovexattr_t)(priv->fops[GLUPY_FREMOVEXATTR]))(
- frame, this, fd, name, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-}
-
-void
-wind_fremovexattr(call_frame_t *frame, xlator_t *xl, fd_t *fd, const char *name,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_fremovexattr_cbk, xl, xl->fops->fremovexattr, fd,
- name, xdata);
-}
-
-void
-unwind_fremovexattr(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
-}
-
-void
-set_fremovexattr_fop(long py_this, fop_fremovexattr_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_FREMOVEXATTR] = (long)fop;
-}
-
-void
-set_fremovexattr_cbk(long py_this, fop_fremovexattr_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_FREMOVEXATTR] = (long)cbk;
-}
-
-/* FOP: LINK*/
-int32_t
-glupy_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_LINK]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_link_cbk_t)(priv->cbks[GLUPY_LINK]))(
- frame, cookie, this, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int32_t
-glupy_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_LINK]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_link_t)(priv->fops[GLUPY_LINK]))(frame, this, oldloc, newloc,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-void
-wind_link(call_frame_t *frame, xlator_t *xl, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_link_cbk, xl, xl->fops->link, oldloc, newloc,
- xdata);
-}
-
-void
-unwind_link(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
-}
-
-void
-set_link_fop(long py_this, fop_link_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_LINK] = (long)fop;
-}
-
-void
-set_link_cbk(long py_this, fop_link_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_LINK] = (long)cbk;
-}
-
-/* FOP: SYMLINK*/
-int32_t
-glupy_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_SYMLINK]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_symlink_cbk_t)(priv->cbks[GLUPY_SYMLINK]))(
- frame, cookie, this, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int32_t
-glupy_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc, mode_t umask, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_SYMLINK]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_symlink_t)(priv->fops[GLUPY_SYMLINK]))(frame, this, linkname,
- loc, umask, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
- return 0;
-}
-
-void
-wind_symlink(call_frame_t *frame, xlator_t *xl, const char *linkname,
- loc_t *loc, mode_t umask, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_symlink_cbk, xl, xl->fops->symlink, linkname, loc,
- umask, xdata);
-}
-
-void
-unwind_symlink(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
-}
-
-void
-set_symlink_fop(long py_this, fop_symlink_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_SYMLINK] = (long)fop;
-}
-
-void
-set_symlink_cbk(long py_this, fop_symlink_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_SYMLINK] = (long)cbk;
-}
-
-/* FOP: READLINK */
-int32_t
-glupy_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_READLINK]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_readlink_cbk_t)(priv->cbks[GLUPY_READLINK]))(
- frame, cookie, this, op_ret, op_errno, path, buf, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, path, buf, xdata);
- return 0;
-}
-
-int32_t
-glupy_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_READLINK]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_readlink_t)(priv->fops[GLUPY_READLINK]))(frame, this, loc, size,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
- return 0;
-}
-
-void
-wind_readlink(call_frame_t *frame, xlator_t *xl, loc_t *loc, size_t size,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_readlink_cbk, xl, xl->fops->readlink, loc, size,
- xdata);
-}
-
-void
-unwind_readlink(call_frame_t *frame, long cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, path, buf, xdata);
-}
-
-void
-set_readlink_fop(long py_this, fop_readlink_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_READLINK] = (long)fop;
-}
-
-void
-set_readlink_cbk(long py_this, fop_readlink_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_READLINK] = (long)cbk;
-}
-
-/* FOP: UNLINK */
-
-int32_t
-glupy_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_UNLINK]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_unlink_cbk_t)(priv->cbks[GLUPY_UNLINK]))(
- frame, cookie, this, op_ret, op_errno, preparent, postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
- xdata);
- return 0;
-}
-
-int32_t
-glupy_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_UNLINK]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_unlink_t)(priv->fops[GLUPY_UNLINK]))(frame, this, loc, xflags,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata);
- return 0;
-}
-
-void
-wind_unlink(call_frame_t *frame, xlator_t *xl, loc_t *loc, int xflags,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_unlink_cbk, xl, xl->fops->unlink, loc, xflags,
- xdata);
-}
-
-void
-unwind_unlink(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
- xdata);
-}
-
-void
-set_unlink_fop(long py_this, fop_unlink_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_UNLINK] = (long)fop;
-}
-
-void
-set_unlink_cbk(long py_this, fop_unlink_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_UNLINK] = (long)cbk;
-}
-
-/* FOP: MKDIR */
-
-int32_t
-glupy_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_MKDIR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_mkdir_cbk_t)(priv->cbks[GLUPY_MKDIR]))(
- frame, cookie, this, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
- return 0;
-}
-
-int32_t
-glupy_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_MKDIR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_mkdir_t)(priv->fops[GLUPY_MKDIR]))(frame, this, loc, mode,
- umask, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
-}
-
-void
-wind_mkdir(call_frame_t *frame, xlator_t *xl, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_mkdir_cbk, xl, xl->fops->mkdir, loc, mode, umask,
- xdata);
-}
-
-void
-unwind_mkdir(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
- postparent, xdata);
-}
-
-void
-set_mkdir_fop(long py_this, fop_mkdir_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_MKDIR] = (long)fop;
-}
-
-void
-set_mkdir_cbk(long py_this, fop_mkdir_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_MKDIR] = (long)cbk;
-}
-
-/* FOP: RMDIR */
-
-int32_t
-glupy_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
-
- if (!priv->cbks[GLUPY_RMDIR]) {
- goto unwind;
- }
-
- gstate = glupy_enter();
- ret = ((fop_rmdir_cbk_t)(priv->cbks[GLUPY_RMDIR]))(
- frame, cookie, this, op_ret, op_errno, preparent, postparent, xdata);
- glupy_leave(gstate);
-
- return ret;
-
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent,
- xdata);
- return 0;
-}
-
-int32_t
-glupy_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
- dict_t *xdata)
-{
- glupy_private_t *priv = this->private;
- PyGILState_STATE gstate;
- int32_t ret;
- static long next_id = 0;
-
- if (!priv->fops[GLUPY_RMDIR]) {
- goto wind;
- }
-
- gstate = glupy_enter();
- frame->local = (void *)++next_id;
- ret = ((fop_rmdir_t)(priv->fops[GLUPY_RMDIR]))(frame, this, loc, xflags,
- xdata);
- glupy_leave(gstate);
-
- return ret;
-
-wind:
- STACK_WIND(frame, glupy_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
- return 0;
-}
-
-void
-wind_rmdir(call_frame_t *frame, xlator_t *xl, loc_t *loc, int xflags,
- dict_t *xdata)
-{
- xlator_t *this = THIS;
-
- if (!xl || (xl == this)) {
- xl = FIRST_CHILD(this);
- }
-
- STACK_WIND(frame, glupy_rmdir_cbk, xl, xl->fops->rmdir, loc, xflags, xdata);
-}
-
-void
-unwind_rmdir(call_frame_t *frame, long cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- frame->local = NULL;
- STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent,
- xdata);
-}
-
-void
-set_rmdir_fop(long py_this, fop_rmdir_t fop)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->fops[GLUPY_RMDIR] = (long)fop;
-}
-
-void
-set_rmdir_cbk(long py_this, fop_rmdir_cbk_t cbk)
-{
- glupy_private_t *priv = ((xlator_t *)py_this)->private;
-
- priv->cbks[GLUPY_RMDIR] = (long)cbk;
-}
-
-/* NON-FOP-SPECIFIC CODE */
-
-long
-get_id(call_frame_t *frame)
-{
- return (long)(frame->local);
-}
-
-uint64_t
-get_rootunique(call_frame_t *frame)
-{
- return frame->root->unique;
-}
-
-int32_t
-mem_acct_init(xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init(this, gf_glupy_mt_end);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR,
- "Memory accounting init"
- " failed");
- return ret;
- }
-
- return ret;
-}
-
-static void
-py_error_log(const char *name, PyObject *pystr)
-{
-#if PY_MAJOR_VERSION > 2
- char scr[256];
- if (PyUnicode_Check(pystr)) {
- PyObject *tmp = PyUnicode_AsEncodedString(pystr, "UTF-8", "strict");
- if (tmp != NULL) {
- strncpy(scr, PyBytes_AS_STRING(pystr), sizeof(scr));
- Py_DECREF(tmp);
- } else {
- strncpy(scr, "string encoding error", sizeof(scr));
- }
- } else if (PyBytes_Check(pystr)) {
- strncpy(scr, PyBytes_AS_STRING(pystr), sizeof(scr));
- } else {
- strncpy(scr, "string encoding error", sizeof(scr));
- }
- gf_log(name, GF_LOG_ERROR, "Python error: %s", scr);
-#else
- gf_log(name, GF_LOG_ERROR, "Python error: %s", PyString_AsString(pystr));
-#endif
-}
-
-static PyObject *
-encode(const char *str)
-{
-#if PY_MAJOR_VERSION > 2
- return PyUnicode_FromString(str);
-#else
- return PyString_FromString(str);
-#endif
-}
-
-int32_t
-init(xlator_t *this)
-{
- glupy_private_t *priv = NULL;
- char *module_name = NULL;
- PyObject *py_mod_name = NULL;
- PyObject *py_init_func = NULL;
- PyObject *py_args = NULL;
- PyObject *syspath = NULL;
- PyObject *path = NULL;
- PyObject *error_type = NULL;
- PyObject *error_msg = NULL;
- PyObject *error_bt = NULL;
- static gf_boolean_t py_inited = _gf_false;
- void *err_cleanup = &&err_return;
- char libpython[16];
-
- if (dict_get_str(this->options, "module-name", &module_name) != 0) {
- gf_log(this->name, GF_LOG_ERROR, "missing module-name");
- return -1;
- }
-
- priv = GF_CALLOC(1, sizeof(glupy_private_t), gf_glupy_mt_priv);
- if (!priv) {
- goto *err_cleanup;
- }
- this->private = priv;
- err_cleanup = &&err_free_priv;
-
- if (!py_inited) {
- /* FIXME:
- * This hack is necessary because glusterfs (rightly) loads
- * glupy.so with RTLD_LOCAL but glupy needs libpython to be
- * loaded with RTLD_GLOBAL even though glupy is correctly
- * linked with libpython.
- * This is needed because one of the internal modules of
- * python 2.x (lib-dynload/_struct.so) does not explicitly
- * link with libpython.
- */
- snprintf(libpython, sizeof(libpython), "libpython%d.%d.so",
- PY_MAJOR_VERSION, PY_MINOR_VERSION);
- if (!dlopen(libpython, RTLD_NOW | RTLD_GLOBAL)) {
- gf_msg(this->name, GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED,
- "dlopen(%s) failed: %s", libpython, dlerror());
- }
-
- /*
- * This must be done before Py_Initialize(),
- * because it will duplicate the environment,
- * and fail to see later environment updates.
- */
- setenv("PATH_GLUSTERFS_GLUPY_MODULE", PATH_GLUSTERFS_GLUPY_MODULE, 1);
-
- Py_Initialize();
- PyEval_InitThreads();
-
- (void)pthread_key_create(&gil_init_key, NULL);
- (void)pthread_setspecific(gil_init_key, (void *)1);
-
- /* PyEval_InitThreads takes this "for" us. No thanks. */
- PyEval_ReleaseLock();
- py_inited = _gf_true;
- }
-
- /* Adjust python's path */
- syspath = PySys_GetObject("path");
- path = encode(GLUSTER_PYTHON_PATH);
- PyList_Append(syspath, path);
- Py_DECREF(path);
-
- py_mod_name = encode(module_name);
- if (!py_mod_name) {
- gf_log(this->name, GF_LOG_ERROR, "could not create name");
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
-
- gf_log(this->name, GF_LOG_DEBUG, "py_mod_name = %s", module_name);
- priv->py_module = PyImport_Import(py_mod_name);
- Py_DECREF(py_mod_name);
- if (!priv->py_module) {
- gf_log(this->name, GF_LOG_ERROR, "Python import of %s failed",
- module_name);
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
- gf_log(this->name, GF_LOG_INFO, "Import of %s succeeded", module_name);
- err_cleanup = &&err_deref_module;
-
- py_init_func = PyObject_GetAttrString(priv->py_module, "xlator");
- if (!py_init_func || !PyCallable_Check(py_init_func)) {
- gf_log(this->name, GF_LOG_ERROR, "missing init func");
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
- err_cleanup = &&err_deref_init;
-
- py_args = PyTuple_New(1);
- if (!py_args) {
- gf_log(this->name, GF_LOG_ERROR, "could not create args");
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
- PyTuple_SetItem(py_args, 0, PyLong_FromLong((long)this));
-
- /* TBD: pass in list of children */
- priv->py_xlator = PyObject_CallObject(py_init_func, py_args);
- Py_DECREF(py_args);
- if (!priv->py_xlator) {
- gf_log(this->name, GF_LOG_ERROR, "Python init failed");
- if (PyErr_Occurred()) {
- PyErr_Fetch(&error_type, &error_msg, &error_bt);
- py_error_log(this->name, error_msg);
- }
- goto *err_cleanup;
- }
- gf_log(this->name, GF_LOG_DEBUG, "init returned %p", priv->py_xlator);
-
- return 0;
-
-err_deref_init:
- Py_DECREF(py_init_func);
-err_deref_module:
- Py_DECREF(priv->py_module);
-err_free_priv:
- GF_FREE(priv);
-err_return:
- return -1;
-}
-
-void
-fini(xlator_t *this)
-{
- glupy_private_t *priv = this->private;
-
- if (!priv)
- return;
- Py_DECREF(priv->py_xlator);
- Py_DECREF(priv->py_module);
- this->private = NULL;
- GF_FREE(priv);
-
- return;
-}
-
-struct xlator_fops fops = {.lookup = glupy_lookup,
- .create = glupy_create,
- .open = glupy_open,
- .readv = glupy_readv,
- .writev = glupy_writev,
- .opendir = glupy_opendir,
- .readdir = glupy_readdir,
- .stat = glupy_stat,
- .fstat = glupy_fstat,
- .setxattr = glupy_setxattr,
- .getxattr = glupy_getxattr,
- .fsetxattr = glupy_fsetxattr,
- .fgetxattr = glupy_fgetxattr,
- .removexattr = glupy_removexattr,
- .fremovexattr = glupy_fremovexattr,
- .link = glupy_link,
- .unlink = glupy_unlink,
- .readlink = glupy_readlink,
- .symlink = glupy_symlink,
- .mkdir = glupy_mkdir,
- .rmdir = glupy_rmdir,
- .statfs = glupy_statfs,
- .readdirp = glupy_readdirp};
-
-struct xlator_cbks cbks = {};
-
-struct volume_options options[] = {
- {.key = {NULL}},
-};
diff --git a/xlators/features/glupy/src/glupy.h b/xlators/features/glupy/src/glupy.h
deleted file mode 100644
index 851b02154d2..00000000000
--- a/xlators/features/glupy/src/glupy.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __GLUPY_H__
-#define __GLUPY_H__
-
-#include "mem-types.h"
-
-enum {
- GLUPY_LOOKUP = 0,
- GLUPY_CREATE,
- GLUPY_OPEN,
- GLUPY_READV,
- GLUPY_WRITEV,
- GLUPY_OPENDIR,
- GLUPY_READDIR,
- GLUPY_READDIRP,
- GLUPY_STAT,
- GLUPY_FSTAT,
- GLUPY_STATFS,
- GLUPY_SETXATTR,
- GLUPY_GETXATTR,
- GLUPY_FSETXATTR,
- GLUPY_FGETXATTR,
- GLUPY_REMOVEXATTR,
- GLUPY_FREMOVEXATTR,
- GLUPY_LINK,
- GLUPY_UNLINK,
- GLUPY_READLINK,
- GLUPY_SYMLINK,
- GLUPY_MKNOD,
- GLUPY_MKDIR,
- GLUPY_RMDIR,
- GLUPY_N_FUNCS
-};
-
-typedef struct {
- PyObject *py_module;
- PyObject *py_xlator;
- long fops[GLUPY_N_FUNCS];
- long cbks[GLUPY_N_FUNCS];
-} glupy_private_t;
-
-enum gf_glupy_mem_types_ {
- gf_glupy_mt_priv = gf_common_mt_end + 1,
- gf_glupy_mt_end
-};
-
-#endif /* __GLUPY_H__ */
diff --git a/xlators/features/glupy/src/glupy.sym b/xlators/features/glupy/src/glupy.sym
deleted file mode 100644
index 55d9a300108..00000000000
--- a/xlators/features/glupy/src/glupy.sym
+++ /dev/null
@@ -1,101 +0,0 @@
-init
-fini
-fops
-cbks
-options
-notify
-mem_acct_init
-reconfigure
-dumpops
-set_lookup_fop
-set_lookup_cbk
-set_create_fop
-set_create_cbk
-set_open_fop
-set_open_cbk
-set_readv_fop
-set_readv_cbk
-set_writev_fop
-set_writev_cbk
-set_opendir_fop
-set_opendir_cbk
-set_readdir_fop
-set_readdir_cbk
-set_readdirp_fop
-set_readdirp_cbk
-set_stat_fop
-set_stat_cbk
-set_fstat_fop
-set_fstat_cbk
-set_statfs_fop
-set_statfs_cbk
-set_setxattr_fop
-set_setxattr_cbk
-set_getxattr_fop
-set_getxattr_cbk
-set_fsetxattr_fop
-set_fsetxattr_cbk
-set_fgetxattr_fop
-set_fgetxattr_cbk
-set_removexattr_fop
-set_removexattr_cbk
-set_fremovexattr_fop
-set_fremovexattr_cbk
-set_link_fop
-set_link_cbk
-set_symlink_fop
-set_symlink_cbk
-set_readlink_fop
-set_readlink_cbk
-set_unlink_fop
-set_unlink_cbk
-set_mkdir_fop
-set_mkdir_cbk
-set_rmdir_fop
-set_rmdir_cbk
-wind_lookup
-wind_create
-wind_open
-wind_readv
-wind_writev
-wind_opendir
-wind_readdir
-wind_readdirp
-wind_stat
-wind_fstat
-wind_statfs
-wind_setxattr
-wind_getxattr
-wind_fsetxattr
-wind_fgetxattr
-wind_removexattr
-wind_fremovexattr
-wind_link
-wind_symlink
-wind_readlink
-wind_unlink
-wind_mkdir
-wind_rmdir
-unwind_lookup
-unwind_create
-unwind_open
-unwind_readv
-unwind_writev
-unwind_opendir
-unwind_readdir
-unwind_readdirp
-unwind_stat
-unwind_fstat
-unwind_statfs
-unwind_setxattr
-unwind_getxattr
-unwind_fsetxattr
-unwind_fgetxattr
-unwind_removexattr
-unwind_fremovexattr
-unwind_link
-unwind_symlink
-unwind_readlink
-unwind_unlink
-unwind_mkdir
-unwind_rmdir
diff --git a/xlators/features/glupy/src/glupy/Makefile.am b/xlators/features/glupy/src/glupy/Makefile.am
deleted file mode 100644
index 573d2da12e1..00000000000
--- a/xlators/features/glupy/src/glupy/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-# Install __init__.py into the Python site-packages area
-pyglupydir = @BUILD_PYTHON_SITE_PACKAGES@/gluster/glupy
-pyglupy_PYTHON = __init__.py
-
-CLEANFILES =
diff --git a/xlators/features/glupy/src/glupy/__init__.py b/xlators/features/glupy/src/glupy/__init__.py
deleted file mode 100644
index 576fbdb9945..00000000000
--- a/xlators/features/glupy/src/glupy/__init__.py
+++ /dev/null
@@ -1,852 +0,0 @@
-##
-## Copyright (c) 2006-2014 Red Hat, Inc. <http://www.redhat.com>
-## This file is part of GlusterFS.
-##
-## This file is licensed to you under your choice of the GNU Lesser
-## General Public License, version 3 or any later version (LGPLv3 or
-## later), or the GNU General Public License, version 2 (GPLv2), in all
-## cases as published by the Free Software Foundation.
-##
-
-import sys
-import os
-from ctypes import *
-
-dl = CDLL(os.getenv("PATH_GLUSTERFS_GLUPY_MODULE", ""), RTLD_GLOBAL)
-
-
-class call_frame_t (Structure):
- pass
-
-class dev_t (Structure):
- pass
-
-
-class dict_t (Structure):
- pass
-
-
-class gf_dirent_t (Structure):
- pass
-
-
-class iobref_t (Structure):
- pass
-
-
-class iovec_t (Structure):
- pass
-
-
-class list_head (Structure):
- pass
-
-list_head._fields_ = [
- ("next", POINTER(list_head)),
- ("prev", POINTER(list_head))
- ]
-
-
-class rwxperm_t (Structure):
- _fields_ = [
- ("read", c_uint8, 1),
- ("write", c_uint8, 1),
- ("execn", c_uint8, 1)
- ]
-
-
-class statvfs_t (Structure):
- pass
-
-
-class xlator_t (Structure):
- pass
-
-
-class ia_prot_t (Structure):
- _fields_ = [
- ("suid", c_uint8, 1),
- ("sgid", c_uint8, 1),
- ("sticky", c_uint8, 1),
- ("owner", rwxperm_t),
- ("group", rwxperm_t),
- ("other", rwxperm_t)
- ]
-
-# For checking file type.
-(IA_INVAL, IA_IFREG, IA_IFDIR, IA_IFLNK, IA_IFBLK, IA_IFCHR, IA_IFIFO,
- IA_IFSOCK) = range(8)
-
-
-class iatt_t (Structure):
- _fields_ = [
- ("ia_no", c_uint64),
- ("ia_gfid", c_ubyte * 16),
- ("ia_dev", c_uint64),
- ("ia_type", c_uint),
- ("ia_prot", ia_prot_t),
- ("ia_nlink", c_uint32),
- ("ia_uid", c_uint32),
- ("ia_gid", c_uint32),
- ("ia_rdev", c_uint64),
- ("ia_size", c_uint64),
- ("ia_blksize", c_uint32),
- ("ia_blocks", c_uint64),
- ("ia_atime", c_uint32 ),
- ("ia_atime_nsec", c_uint32),
- ("ia_mtime", c_uint32),
- ("ia_mtime_nsec", c_uint32),
- ("ia_ctime", c_uint32),
- ("ia_ctime_nsec", c_uint32)
- ]
-
-
-class mem_pool (Structure):
- _fields_ = [
- ("list", list_head),
- ("hot_count", c_int),
- ("cold_count", c_int),
- ("lock", c_void_p),
- ("padded_sizeof_type", c_ulong),
- ("pool", c_void_p),
- ("pool_end", c_void_p),
- ("real_sizeof_type", c_int),
- ("alloc_count", c_uint64),
- ("pool_misses", c_uint64),
- ("max_alloc", c_int),
- ("curr_stdalloc", c_int),
- ("max_stdalloc", c_int),
- ("name", c_char_p),
- ("global_list", list_head)
- ]
-
-
-class U_ctx_key_inode (Union):
- _fields_ = [
- ("key", c_uint64),
- ("xl_key", POINTER(xlator_t))
- ]
-
-
-class U_ctx_value1 (Union):
- _fields_ = [
- ("value1", c_uint64),
- ("ptr1", c_void_p)
- ]
-
-
-class U_ctx_value2 (Union):
- _fields_ = [
- ("value2", c_uint64),
- ("ptr2", c_void_p)
- ]
-
-class inode_ctx (Structure):
- _anonymous_ = ("u_key", "u_value1", "u_value2",)
- _fields_ = [
- ("u_key", U_ctx_key_inode),
- ("u_value1", U_ctx_value1),
- ("u_value2", U_ctx_value2)
- ]
-
-class inode_t (Structure):
- pass
-
-class inode_table_t (Structure):
- _fields_ = [
- ("lock", c_void_p),
- ("hashsize", c_size_t),
- ("name", c_char_p),
- ("root", POINTER(inode_t)),
- ("xl", POINTER(xlator_t)),
- ("lru_limit", c_uint32),
- ("inode_hash", POINTER(list_head)),
- ("name_hash", POINTER(list_head)),
- ("active", list_head),
- ("active_size", c_uint32),
- ("lru", list_head),
- ("lru_size", c_uint32),
- ("purge", list_head),
- ("purge_size", c_uint32),
- ("inode_pool", POINTER(mem_pool)),
- ("dentry_pool", POINTER(mem_pool)),
- ("fd_mem_pool", POINTER(mem_pool))
- ]
-
-inode_t._fields_ = [
- ("table", POINTER(inode_table_t)),
- ("gfid", c_ubyte * 16),
- ("lock", c_void_p),
- ("nlookup", c_uint64),
- ("fd_count", c_uint32),
- ("ref", c_uint32),
- ("ia_type", c_uint),
- ("fd_list", list_head),
- ("dentry_list", list_head),
- ("hashv", list_head),
- ("listv", list_head),
- ("ctx", POINTER(inode_ctx))
- ]
-
-
-
-class U_ctx_key_fd (Union):
- _fields_ = [
- ("key", c_uint64),
- ("xl_key", c_void_p)
- ]
-
-class fd_lk_ctx (Structure):
- _fields_ = [
- ("lk_list", list_head),
- ("ref", c_int),
- ("lock", c_void_p)
- ]
-
-class fd_ctx (Structure):
- _anonymous_ = ("u_key", "u_value1")
- _fields_ = [
- ("u_key", U_ctx_key_fd),
- ("u_value1", U_ctx_value1)
- ]
-
-class fd_t (Structure):
- _fields_ = [
- ("pid", c_uint64),
- ("flags", c_int32),
- ("refcount", c_int32),
- ("inode_list", list_head),
- ("inode", POINTER(inode_t)),
- ("lock", c_void_p),
- ("ctx", POINTER(fd_ctx)),
- ("xl_count", c_int),
- ("lk_ctx", POINTER(fd_lk_ctx)),
- ("anonymous", c_uint)
- ]
-
-class loc_t (Structure):
- _fields_ = [
- ("path", c_char_p),
- ("name", c_char_p),
- ("inode", POINTER(inode_t)),
- ("parent", POINTER(inode_t)),
- ("gfid", c_ubyte * 16),
- ("pargfid", c_ubyte * 16),
- ]
-
-
-
-def _init_op (a_class, fop, cbk, wind, unwind):
- # Decorators, used by translators. We could pass the signatures as
- # parameters, but it's actually kind of nice to keep them around for
- # inspection.
- a_class.fop_type = CFUNCTYPE(*a_class.fop_sig)
- a_class.cbk_type = CFUNCTYPE(*a_class.cbk_sig)
- # Dispatch-function registration.
- fop.restype = None
- fop.argtypes = [ c_long, a_class.fop_type ]
- # Callback-function registration.
- cbk.restype = None
- cbk.argtypes = [ c_long, a_class.cbk_type ]
- # STACK_WIND function.
- wind.restype = None
- wind.argtypes = list(a_class.fop_sig[1:])
- # STACK_UNWIND function.
- unwind.restype = None
- unwind.argtypes = list(a_class.cbk_sig[1:])
-
-class OpLookup:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
- POINTER(dict_t), POINTER(iatt_t))
-_init_op (OpLookup, dl.set_lookup_fop, dl.set_lookup_cbk,
- dl.wind_lookup, dl.unwind_lookup)
-
-class OpCreate:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_int, c_uint, c_uint, POINTER(fd_t),
- POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(fd_t), POINTER(inode_t),
- POINTER(iatt_t), POINTER(iatt_t), POINTER(iatt_t),
- POINTER(dict_t))
-_init_op (OpCreate, dl.set_create_fop, dl.set_create_cbk,
- dl.wind_create, dl.unwind_create)
-
-class OpOpen:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_int, POINTER(fd_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(fd_t), POINTER(dict_t))
-_init_op (OpOpen, dl.set_open_fop, dl.set_open_cbk,
- dl.wind_open, dl.unwind_open)
-
-class OpReadv:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_size_t, c_long, c_uint32, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iovec_t), c_int, POINTER(iatt_t),
- POINTER(iobref_t), POINTER(dict_t))
-_init_op (OpReadv, dl.set_readv_fop, dl.set_readv_cbk,
- dl.wind_readv, dl.unwind_readv)
-class OpWritev:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(iovec_t), c_int, c_long, c_uint32,
- POINTER(iobref_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
- POINTER(dict_t))
-_init_op (OpWritev, dl.set_writev_fop, dl.set_writev_cbk,
- dl.wind_writev, dl.unwind_writev)
-
-class OpOpendir:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(fd_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(fd_t), POINTER(dict_t))
-_init_op (OpOpendir, dl.set_opendir_fop, dl.set_opendir_cbk,
- dl.wind_opendir, dl.unwind_opendir)
-
-class OpReaddir:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
-_init_op (OpReaddir, dl.set_readdir_fop, dl.set_readdir_cbk,
- dl.wind_readdir, dl.unwind_readdir)
-
-class OpReaddirp:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
-_init_op (OpReaddirp, dl.set_readdirp_fop, dl.set_readdirp_cbk,
- dl.wind_readdirp, dl.unwind_readdirp)
-
-class OpStat:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpStat, dl.set_stat_fop, dl.set_stat_cbk,
- dl.wind_stat, dl.unwind_stat)
-
-class OpFstat:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpFstat, dl.set_fstat_fop, dl.set_fstat_cbk,
- dl.wind_fstat, dl.unwind_fstat)
-
-class OpStatfs:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(statvfs_t), POINTER(dict_t))
-_init_op (OpStatfs, dl.set_statfs_fop, dl.set_statfs_cbk,
- dl.wind_statfs, dl.unwind_statfs)
-
-
-class OpSetxattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(dict_t), c_int32,
- POINTER (dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t))
-_init_op (OpSetxattr, dl.set_setxattr_fop, dl.set_setxattr_cbk,
- dl.wind_setxattr, dl.unwind_setxattr)
-
-class OpGetxattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_char_p, POINTER (dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t), POINTER(dict_t))
-_init_op (OpGetxattr, dl.set_getxattr_fop, dl.set_getxattr_cbk,
- dl.wind_getxattr, dl.unwind_getxattr)
-
-class OpFsetxattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(dict_t), c_int32,
- POINTER (dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t))
-_init_op (OpFsetxattr, dl.set_fsetxattr_fop, dl.set_fsetxattr_cbk,
- dl.wind_fsetxattr, dl.unwind_fsetxattr)
-
-class OpFgetxattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_char_p, POINTER (dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t), POINTER(dict_t))
-_init_op (OpFgetxattr, dl.set_fgetxattr_fop, dl.set_fgetxattr_cbk,
- dl.wind_fgetxattr, dl.unwind_fgetxattr)
-
-class OpRemovexattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_char_p, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t))
-_init_op (OpRemovexattr, dl.set_removexattr_fop, dl.set_removexattr_cbk,
- dl.wind_removexattr, dl.unwind_removexattr)
-
-
-class OpFremovexattr:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), c_char_p, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(dict_t))
-_init_op (OpFremovexattr, dl.set_fremovexattr_fop, dl.set_fremovexattr_cbk,
- dl.wind_fremovexattr, dl.unwind_fremovexattr)
-
-class OpLink:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), POINTER(loc_t), POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
- POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpLink, dl.set_link_fop, dl.set_link_cbk,
- dl.wind_link, dl.unwind_link)
-
-class OpSymlink:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- c_char_p, POINTER(loc_t), c_uint, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
- POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpSymlink, dl.set_symlink_fop, dl.set_symlink_cbk,
- dl.wind_symlink, dl.unwind_symlink)
-
-class OpUnlink:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_int, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
- POINTER(dict_t))
-_init_op (OpUnlink, dl.set_unlink_fop, dl.set_unlink_cbk,
- dl.wind_unlink, dl.unwind_unlink)
-
-class OpReadlink:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_size_t, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, c_char_p, POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpReadlink, dl.set_readlink_fop, dl.set_readlink_cbk,
- dl.wind_readlink, dl.unwind_readlink)
-
-class OpMkdir:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_uint, c_uint, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
- POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
-_init_op (OpMkdir, dl.set_mkdir_fop, dl.set_mkdir_cbk,
- dl.wind_mkdir, dl.unwind_mkdir)
-
-class OpRmdir:
- fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(loc_t), c_int, POINTER(dict_t))
- cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
- c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
- POINTER(dict_t))
-_init_op (OpRmdir, dl.set_rmdir_fop, dl.set_rmdir_cbk,
- dl.wind_rmdir, dl.unwind_rmdir)
-
-
-class Translator:
- def __init__ (self, c_this):
- # This is only here to keep references to the stubs we create,
- # because ctypes doesn't and glupy.so can't because it doesn't
- # get a pointer to the actual Python object. It's a dictionary
- # instead of a list in case we ever allow changing fops/cbks
- # after initialization and need to look them up.
- self.stub_refs = {}
- funcs = dir(self.__class__)
- if "lookup_fop" in funcs:
- @OpLookup.fop_type
- def stub (frame, this, loc, xdata, s=self):
- return s.lookup_fop (frame, this, loc, xdata)
- self.stub_refs["lookup_fop"] = stub
- dl.set_lookup_fop(c_this, stub)
- if "lookup_cbk" in funcs:
- @OpLookup.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, inode,
- buf, xdata, postparent, s=self):
- return s.lookup_cbk(frame, cookie, this, op_ret,
- op_errno, inode, buf, xdata,
- postparent)
- self.stub_refs["lookup_cbk"] = stub
- dl.set_lookup_cbk(c_this, stub)
- if "create_fop" in funcs:
- @OpCreate.fop_type
- def stub (frame, this, loc, flags, mode, umask, fd,
- xdata, s=self):
- return s.create_fop (frame, this, loc, flags,
- mode, umask, fd, xdata)
- self.stub_refs["create_fop"] = stub
- dl.set_create_fop(c_this, stub)
- if "create_cbk" in funcs:
- @OpCreate.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, fd,
- inode, buf, preparent, postparent, xdata,
- s=self):
- return s.create_cbk (frame, cookie, this,
- op_ret, op_errno, fd,
- inode, buf, preparent,
- postparent, xdata)
- self.stub_refs["create_cbk"] = stub
- dl.set_create_cbk(c_this, stub)
- if "open_fop" in funcs:
- @OpOpen.fop_type
- def stub (frame, this, loc, flags, fd,
- xdata, s=self):
- return s.open_fop (frame, this, loc, flags,
- fd, xdata)
- self.stub_refs["open_fop"] = stub
- dl.set_open_fop(c_this, stub)
- if "open_cbk" in funcs:
- @OpOpen.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, fd,
- xdata, s=self):
- return s.open_cbk (frame, cookie, this,
- op_ret, op_errno, fd,
- xdata)
- self.stub_refs["open_cbk"] = stub
- dl.set_open_cbk(c_this, stub)
- if "readv_fop" in funcs:
- @OpReadv.fop_type
- def stub (frame, this, fd, size, offset, flags,
- xdata, s=self):
- return s.readv_fop (frame, this, fd, size,
- offset, flags, xdata)
- self.stub_refs["readv_fop"] = stub
- dl.set_readv_fop(c_this, stub)
- if "readv_cbk" in funcs:
- @OpReadv.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- vector, count, stbuf, iobref, xdata,
- s=self):
- return s.readv_cbk (frame, cookie, this,
- op_ret, op_errno, vector,
- count, stbuf, iobref,
- xdata)
- self.stub_refs["readv_cbk"] = stub
- dl.set_readv_cbk(c_this, stub)
- if "writev_fop" in funcs:
- @OpWritev.fop_type
- def stub (frame, this, fd, vector, count,
- offset, flags, iobref, xdata, s=self):
- return s.writev_fop (frame, this, fd, vector,
- count, offset, flags,
- iobref, xdata)
- self.stub_refs["writev_fop"] = stub
- dl.set_writev_fop(c_this, stub)
- if "writev_cbk" in funcs:
- @OpWritev.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- prebuf, postbuf, xdata, s=self):
- return s.writev_cbk (frame, cookie, this,
- op_ret, op_errno, prebuf,
- postbuf, xdata)
- self.stub_refs["writev_cbk"] = stub
- dl.set_writev_cbk(c_this, stub)
- if "opendir_fop" in funcs:
- @OpOpendir.fop_type
- def stub (frame, this, loc, fd, xdata, s=self):
- return s.opendir_fop (frame, this, loc, fd,
- xdata)
- self.stub_refs["opendir_fop"] = stub
- dl.set_opendir_fop(c_this, stub)
- if "opendir_cbk" in funcs:
- @OpOpendir.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, fd,
- xdata, s=self):
- return s.opendir_cbk(frame, cookie, this,
- op_ret, op_errno, fd,
- xdata)
- self.stub_refs["opendir_cbk"] = stub
- dl.set_opendir_cbk(c_this, stub)
- if "readdir_fop" in funcs:
- @OpReaddir.fop_type
- def stub (frame, this, fd, size, offset, xdata, s=self):
- return s.readdir_fop (frame, this, fd, size,
- offset, xdata)
- self.stub_refs["readdir_fop"] = stub
- dl.set_readdir_fop(c_this, stub)
- if "readdir_cbk" in funcs:
- @OpReaddir.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- entries, xdata, s=self):
- return s.readdir_cbk(frame, cookie, this,
- op_ret, op_errno, entries,
- xdata)
- self.stub_refs["readdir_cbk"] = stub
- dl.set_readdir_cbk(c_this, stub)
- if "readdirp_fop" in funcs:
- @OpReaddirp.fop_type
- def stub (frame, this, fd, size, offset, xdata, s=self):
- return s.readdirp_fop (frame, this, fd, size,
- offset, xdata)
- self.stub_refs["readdirp_fop"] = stub
- dl.set_readdirp_fop(c_this, stub)
- if "readdirp_cbk" in funcs:
- @OpReaddirp.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- entries, xdata, s=self):
- return s.readdirp_cbk (frame, cookie, this,
- op_ret, op_errno,
- entries, xdata)
- self.stub_refs["readdirp_cbk"] = stub
- dl.set_readdirp_cbk(c_this, stub)
- if "stat_fop" in funcs:
- @OpStat.fop_type
- def stub (frame, this, loc, xdata, s=self):
- return s.stat_fop (frame, this, loc, xdata)
- self.stub_refs["stat_fop"] = stub
- dl.set_stat_fop(c_this, stub)
- if "stat_cbk" in funcs:
- @OpStat.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, buf,
- xdata, s=self):
- return s.stat_cbk(frame, cookie, this, op_ret,
- op_errno, buf, xdata)
- self.stub_refs["stat_cbk"] = stub
- dl.set_stat_cbk(c_this, stub)
- if "fstat_fop" in funcs:
- @OpFstat.fop_type
- def stub (frame, this, fd, xdata, s=self):
- return s.fstat_fop (frame, this, fd, xdata)
- self.stub_refs["fstat_fop"] = stub
- dl.set_fstat_fop(c_this, stub)
- if "fstat_cbk" in funcs:
- @OpFstat.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, buf,
- xdata, s=self):
- return s.fstat_cbk(frame, cookie, this, op_ret,
- op_errno, buf, xdata)
- self.stub_refs["fstat_cbk"] = stub
- dl.set_fstat_cbk(c_this, stub)
- if "statfs_fop" in funcs:
- @OpStatfs.fop_type
- def stub (frame, this, loc, xdata, s=self):
- return s.statfs_fop (frame, this, loc, xdata)
- self.stub_refs["statfs_fop"] = stub
- dl.set_statfs_fop(c_this, stub)
- if "statfs_cbk" in funcs:
- @OpStatfs.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, buf,
- xdata, s=self):
- return s.statfs_cbk (frame, cookie, this,
- op_ret, op_errno, buf,
- xdata)
- self.stub_refs["statfs_cbk"] = stub
- dl.set_statfs_cbk(c_this, stub)
- if "setxattr_fop" in funcs:
- @OpSetxattr.fop_type
- def stub (frame, this, loc, dictionary, flags, xdata,
- s=self):
- return s.setxattr_fop (frame, this, loc,
- dictionary, flags,
- xdata)
- self.stub_refs["setxattr_fop"] = stub
- dl.set_setxattr_fop(c_this, stub)
- if "setxattr_cbk" in funcs:
- @OpSetxattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, xdata,
- s=self):
- return s.setxattr_cbk(frame, cookie, this,
- op_ret, op_errno, xdata)
- self.stub_refs["setxattr_cbk"] = stub
- dl.set_setxattr_cbk(c_this, stub)
- if "getxattr_fop" in funcs:
- @OpGetxattr.fop_type
- def stub (frame, this, loc, name, xdata, s=self):
- return s.getxattr_fop (frame, this, loc, name,
- xdata)
- self.stub_refs["getxattr_fop"] = stub
- dl.set_getxattr_fop(c_this, stub)
- if "getxattr_cbk" in funcs:
- @OpGetxattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- dictionary, xdata, s=self):
- return s.getxattr_cbk(frame, cookie, this,
- op_ret, op_errno,
- dictionary, xdata)
- self.stub_refs["getxattr_cbk"] = stub
- dl.set_getxattr_cbk(c_this, stub)
- if "fsetxattr_fop" in funcs:
- @OpFsetxattr.fop_type
- def stub (frame, this, fd, dictionary, flags, xdata,
- s=self):
- return s.fsetxattr_fop (frame, this, fd,
- dictionary, flags,
- xdata)
- self.stub_refs["fsetxattr_fop"] = stub
- dl.set_fsetxattr_fop(c_this, stub)
- if "fsetxattr_cbk" in funcs:
- @OpFsetxattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, xdata,
- s=self):
- return s.fsetxattr_cbk(frame, cookie, this,
- op_ret, op_errno, xdata)
- self.stub_refs["fsetxattr_cbk"] = stub
- dl.set_fsetxattr_cbk(c_this, stub)
- if "fgetxattr_fop" in funcs:
- @OpFgetxattr.fop_type
- def stub (frame, this, fd, name, xdata, s=self):
- return s.fgetxattr_fop (frame, this, fd, name,
- xdata)
- self.stub_refs["fgetxattr_fop"] = stub
- dl.set_fgetxattr_fop(c_this, stub)
- if "fgetxattr_cbk" in funcs:
- @OpFgetxattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- dictionary, xdata, s=self):
- return s.fgetxattr_cbk(frame, cookie, this,
- op_ret, op_errno,
- dictionary, xdata)
- self.stub_refs["fgetxattr_cbk"] = stub
- dl.set_fgetxattr_cbk(c_this, stub)
- if "removexattr_fop" in funcs:
- @OpRemovexattr.fop_type
- def stub (frame, this, loc, name, xdata, s=self):
- return s.removexattr_fop (frame, this, loc,
- name, xdata)
- self.stub_refs["removexattr_fop"] = stub
- dl.set_removexattr_fop(c_this, stub)
- if "removexattr_cbk" in funcs:
- @OpRemovexattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- xdata, s=self):
- return s.removexattr_cbk(frame, cookie, this,
- op_ret, op_errno,
- xdata)
- self.stub_refs["removexattr_cbk"] = stub
- dl.set_removexattr_cbk(c_this, stub)
- if "fremovexattr_fop" in funcs:
- @OpFremovexattr.fop_type
- def stub (frame, this, fd, name, xdata, s=self):
- return s.fremovexattr_fop (frame, this, fd,
- name, xdata)
- self.stub_refs["fremovexattr_fop"] = stub
- dl.set_fremovexattr_fop(c_this, stub)
- if "fremovexattr_cbk" in funcs:
- @OpFremovexattr.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- xdata, s=self):
- return s.fremovexattr_cbk(frame, cookie, this,
- op_ret, op_errno,
- xdata)
- self.stub_refs["fremovexattr_cbk"] = stub
- dl.set_fremovexattr_cbk(c_this, stub)
- if "link_fop" in funcs:
- @OpLink.fop_type
- def stub (frame, this, oldloc, newloc,
- xdata, s=self):
- return s.link_fop (frame, this, oldloc,
- newloc, xdata)
- self.stub_refs["link_fop"] = stub
- dl.set_link_fop(c_this, stub)
- if "link_cbk" in funcs:
- @OpLink.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- inode, buf, preparent, postparent, xdata,
- s=self):
- return s.link_cbk (frame, cookie, this,
- op_ret, op_errno, inode,
- buf, preparent,
- postparent, xdata)
- self.stub_refs["link_cbk"] = stub
- dl.set_link_cbk(c_this, stub)
- if "symlink_fop" in funcs:
- @OpSymlink.fop_type
- def stub (frame, this, linkname, loc,
- umask, xdata, s=self):
- return s.symlink_fop (frame, this, linkname,
- loc, umask, xdata)
- self.stub_refs["symlink_fop"] = stub
- dl.set_symlink_fop(c_this, stub)
- if "symlink_cbk" in funcs:
- @OpSymlink.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- inode, buf, preparent, postparent, xdata,
- s=self):
- return s.symlink_cbk (frame, cookie, this,
- op_ret, op_errno, inode,
- buf, preparent,
- postparent, xdata)
- self.stub_refs["symlink_cbk"] = stub
- dl.set_symlink_cbk(c_this, stub)
- if "unlink_fop" in funcs:
- @OpUnlink.fop_type
- def stub (frame, this, loc, xflags,
- xdata, s=self):
- return s.unlink_fop (frame, this, loc,
- xflags, xdata)
- self.stub_refs["unlink_fop"] = stub
- dl.set_unlink_fop(c_this, stub)
- if "unlink_cbk" in funcs:
- @OpUnlink.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata, s=self):
- return s.unlink_cbk (frame, cookie, this,
- op_ret, op_errno,
- preparent, postparent,
- xdata)
- self.stub_refs["unlink_cbk"] = stub
- dl.set_unlink_cbk(c_this, stub)
- if "readlink_fop" in funcs:
- @OpReadlink.fop_type
- def stub (frame, this, loc, size,
- xdata, s=self):
- return s.readlink_fop (frame, this, loc,
- size, xdata)
- self.stub_refs["readlink_fop"] = stub
- dl.set_readlink_fop(c_this, stub)
- if "readlink_cbk" in funcs:
- @OpReadlink.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- path, buf, xdata, s=self):
- return s.readlink_cbk (frame, cookie, this,
- op_ret, op_errno,
- path, buf, xdata)
- self.stub_refs["readlink_cbk"] = stub
- dl.set_readlink_cbk(c_this, stub)
- if "mkdir_fop" in funcs:
- @OpMkdir.fop_type
- def stub (frame, this, loc, mode, umask, xdata,
- s=self):
- return s.mkdir_fop (frame, this, loc, mode,
- umask, xdata)
- self.stub_refs["mkdir_fop"] = stub
- dl.set_mkdir_fop(c_this, stub)
- if "mkdir_cbk" in funcs:
- @OpMkdir.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata, s=self):
- return s.mkdir_cbk (frame, cookie, this,
- op_ret, op_errno, inode,
- buf, preparent,
- postparent, xdata)
- self.stub_refs["mkdir_cbk"] = stub
- dl.set_mkdir_cbk(c_this, stub)
- if "rmdir_fop" in funcs:
- @OpRmdir.fop_type
- def stub (frame, this, loc, xflags,
- xdata, s=self):
- return s.rmdir_fop (frame, this, loc,
- xflags, xdata)
- self.stub_refs["rmdir_fop"] = stub
- dl.set_rmdir_fop(c_this, stub)
- if "rmdir_cbk" in funcs:
- @OpRmdir.cbk_type
- def stub (frame, cookie, this, op_ret, op_errno,
- preparent, postparent, xdata, s=self):
- return s.rmdir_cbk (frame, cookie, this,
- op_ret, op_errno,
- preparent, postparent,
- xdata)
- self.stub_refs["rmdir_cbk"] = stub
- dl.set_rmdir_cbk(c_this, stub)
diff --git a/xlators/features/glupy/src/setup.py.in b/xlators/features/glupy/src/setup.py.in
deleted file mode 100644
index 611e9695f76..00000000000
--- a/xlators/features/glupy/src/setup.py.in
+++ /dev/null
@@ -1,24 +0,0 @@
-from distutils.core import setup
-
-DESC = """GlusterFS is a distributed file-system capable of scaling to
-several petabytes. It aggregates various storage bricks over Infiniband
-RDMA or TCP/IP interconnect into one large parallel network file system.
-GlusterFS is one of the most sophisticated file systems in terms of
-features and extensibility. It borrows a powerful concept called
-Translators from GNU Hurd kernel. Much of the code in GlusterFS is in
-user space and easily manageable.
-
-This package contains Glupy, the Python translator interface for GlusterFS."""
-
-setup(
- name='glusterfs-glupy',
- version='@PACKAGE_VERSION@',
- description='Glupy is the Python translator interface for GlusterFS',
- long_description=DESC,
- author='Gluster Community',
- author_email='gluster-devel@gluster.org',
- license='LGPLv3',
- url='http://gluster.org/',
- package_dir={'gluster':''},
- packages=['gluster']
-)
diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h
index a663d76023d..58833d0ec9b 100644
--- a/xlators/features/index/src/index-mem-types.h
+++ b/xlators/features/index/src/index-mem-types.h
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#ifndef __QUIESCE_MEM_TYPES_H__
-#define __QUIESCE_MEM_TYPES_H__
+#ifndef __INDEX_MEM_TYPES_H__
+#define __INDEX_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_index_mem_types_ {
gf_index_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/index/src/index-messages.h b/xlators/features/index/src/index-messages.h
index 3495fb080f0..364f17cd34e 100644
--- a/xlators/features/index/src/index-messages.h
+++ b/xlators/features/index/src/index-messages.h
@@ -11,7 +11,7 @@
#ifndef _INDEX_MESSAGES_H_
#define _INDEX_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index c487e8a5726..4abb2c73ce5 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -8,11 +8,11 @@
cases as published by the Free Software Foundation.
*/
#include "index.h"
-#include "options.h"
+#include <glusterfs/options.h>
#include "glusterfs3-xdr.h"
-#include "syscall.h"
-#include "syncop.h"
-#include "common-utils.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/common-utils.h>
#include "index-messages.h"
#include <ftw.h>
#include <libgen.h> /* for dirname() */
@@ -1685,21 +1685,25 @@ index_get_gfid_type(void *opaque)
loc_wipe(&loc);
- entry->d_type = IA_INVAL;
+ entry->d_type = gf_d_type_from_ia_type(IA_INVAL);
+ entry->d_stat.ia_type = IA_INVAL;
if (gf_uuid_parse(entry->d_name, loc.gfid))
continue;
loc.inode = inode_find(args->parent->table, loc.gfid);
if (loc.inode) {
- entry->d_type = loc.inode->ia_type;
+ entry->d_stat.ia_type = loc.inode->ia_type;
+ entry->d_type = gf_d_type_from_ia_type(loc.inode->ia_type);
continue;
}
loc.inode = inode_new(args->parent->table);
if (!loc.inode)
continue;
ret = syncop_lookup(FIRST_CHILD(this), &loc, &iatt, 0, 0, 0);
- if (ret == 0)
- entry->d_type = iatt.ia_type;
+ if (ret == 0) {
+ entry->d_type = gf_d_type_from_ia_type(iatt.ia_type);
+ entry->d_stat = iatt;
+ }
}
loc_wipe(&loc);
@@ -2100,7 +2104,7 @@ index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
worker_enqueue(this, stub);
return 0;
normal:
- ret = dict_get_str(xattr_req, "link-count", &flag);
+ ret = dict_get_str_sizen(xattr_req, "link-count", &flag);
if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) {
STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
@@ -2474,6 +2478,7 @@ out:
GF_FREE(priv);
this->private = NULL;
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
}
if (attr_inited)
@@ -2587,7 +2592,7 @@ notify(xlator_t *this, int event, void *data, ...)
if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) {
stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
- clock_gettime(CLOCK_REALTIME, &sleep_till);
+ timespec_now_realtime(&sleep_till);
sleep_till.tv_sec += 1;
/* Wait for draining stub from queue before notify PARENT_DOWN */
@@ -2661,3 +2666,17 @@ struct volume_options options[] = {
.default_value = "trusted.afr.{{ volume.name }}"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "index",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
index 149cfd415b3..a2b6e6e2570 100644
--- a/xlators/features/index/src/index.h
+++ b/xlators/features/index/src/index.h
@@ -11,11 +11,11 @@
#ifndef __INDEX_H__
#define __INDEX_H__
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
#include "index-mem-types.h"
#define INDEX_THREAD_STACK_SIZE ((size_t)(1024 * 1024))
diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c
index ba0db4f9b4a..56dee244281 100644
--- a/xlators/features/leases/src/leases-internal.c
+++ b/xlators/features/leases/src/leases-internal.c
@@ -73,7 +73,7 @@ out:
* timeout value(in seconds) set as an option to this xlator.
* -1 error case
*/
-int32_t
+static int32_t
get_recall_lease_timeout(xlator_t *this)
{
leases_private_t *priv = NULL;
@@ -356,9 +356,8 @@ out:
static lease_inode_t *
new_lease_inode(inode_t *inode)
{
- lease_inode_t *l_inode = NULL;
-
- l_inode = GF_CALLOC(1, sizeof(*l_inode), gf_leases_mt_lease_inode_t);
+ lease_inode_t *l_inode = GF_MALLOC(sizeof(*l_inode),
+ gf_leases_mt_lease_inode_t);
if (!l_inode)
goto out;
@@ -379,9 +378,8 @@ __destroy_lease_inode(lease_inode_t *l_inode)
static lease_client_t *
new_lease_client(const char *client_uid)
{
- lease_client_t *clnt = NULL;
-
- clnt = GF_CALLOC(1, sizeof(*clnt), gf_leases_mt_lease_client_t);
+ lease_client_t *clnt = GF_MALLOC(sizeof(*clnt),
+ gf_leases_mt_lease_client_t);
if (!clnt)
goto out;
@@ -448,29 +446,29 @@ out:
static int
add_inode_to_client_list(xlator_t *this, inode_t *inode, const char *client_uid)
{
- int ret = 0;
- leases_private_t *priv = NULL;
+ leases_private_t *priv = this->private;
lease_client_t *clnt = NULL;
- lease_inode_t *lease_inode = NULL;
- priv = this->private;
+ lease_inode_t *lease_inode = new_lease_inode(inode);
+ if (!lease_inode)
+ return -ENOMEM;
+
pthread_mutex_lock(&priv->mutex);
{
clnt = __get_or_new_lease_client(this, priv, client_uid);
- GF_CHECK_ALLOC(clnt, ret, out);
-
- lease_inode = new_lease_inode(inode);
- GF_CHECK_ALLOC(lease_inode, ret, out);
-
+ if (!clnt) {
+ pthread_mutex_unlock(&priv->mutex);
+ __destroy_lease_inode(lease_inode);
+ return -ENOMEM;
+ }
list_add_tail(&clnt->inode_list, &lease_inode->list);
- gf_msg_debug(this->name, 0,
- "Added a new inode:%p to the client(%s) "
- "cleanup list, gfid(%s)",
- inode, client_uid, uuid_utoa(inode->gfid));
}
-out:
pthread_mutex_unlock(&priv->mutex);
- return ret;
+ gf_msg_debug(this->name, 0,
+ "Added a new inode:%p to the client(%s) "
+ "cleanup list, gfid(%s)",
+ inode, client_uid, uuid_utoa(inode->gfid));
+ return 0;
}
/* Add lease entry to the corresponding client entry.
@@ -587,15 +585,17 @@ remove_from_clnt_list(xlator_t *this, const char *client_uid, inode_t *inode)
{
clnt = __get_lease_client(this, priv, client_uid);
if (!clnt) {
+ pthread_mutex_unlock(&priv->mutex);
gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_CLNT_NOTFOUND,
"There is no client entry found in the cleanup list");
- pthread_mutex_unlock(&priv->mutex);
goto out;
}
ret = __remove_inode_from_clnt_list(this, clnt, inode);
if (ret) {
+ pthread_mutex_unlock(&priv->mutex);
gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INODE_NOTFOUND,
"There is no inode entry found in the cleanup list");
+ goto out;
}
}
pthread_mutex_unlock(&priv->mutex);
@@ -676,6 +676,7 @@ __remove_lease(xlator_t *this, inode_t *inode, lease_inode_ctx_t *lease_ctx,
if (lease_ctx->lease_cnt == 0 && lease_ctx->timer) {
ret = gf_tw_del_timer(priv->timer_wheel, lease_ctx->timer);
lease_ctx->recall_in_progress = _gf_false;
+ lease_ctx->timer = NULL;
}
out:
return ret;
@@ -853,20 +854,20 @@ recall_lease_timer_handler(struct gf_tw_timer_list *timer, void *data,
priv = timer_data->this->private;
inode = timer_data->inode;
+ lease_inode = new_lease_inode(inode);
+ if (!lease_inode) {
+ errno = ENOMEM;
+ goto out;
+ }
pthread_mutex_lock(&priv->mutex);
{
- lease_inode = new_lease_inode(inode);
- if (!lease_inode) {
- errno = ENOMEM;
- goto out;
- }
list_add_tail(&lease_inode->list, &priv->recall_list);
pthread_cond_broadcast(&priv->cond);
}
+ pthread_mutex_unlock(&priv->mutex);
out:
/* unref the inode_ref taken by timer_data in __recall_lease */
inode_unref(timer_data->inode);
- pthread_mutex_unlock(&priv->mutex);
GF_FREE(timer);
}
@@ -886,6 +887,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
struct gf_tw_timer_list *timer = NULL;
leases_private_t *priv = NULL;
lease_timer_data_t *timer_data = NULL;
+ time_t recall_time;
if (lease_ctx->recall_in_progress) {
gf_msg_debug(this->name, 0,
@@ -895,6 +897,7 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
}
priv = this->private;
+ recall_time = gf_time();
list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
lease_id_list)
{
@@ -918,9 +921,9 @@ __recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
}
lease_ctx->recall_in_progress = _gf_true;
- lease_entry->recall_time = time(NULL);
+ lease_entry->recall_time = recall_time;
}
- timer = GF_CALLOC(1, sizeof(*timer), gf_common_mt_tw_timer_list);
+ timer = GF_MALLOC(sizeof(*timer), gf_common_mt_tw_timer_list);
if (!timer) {
goto out;
}
@@ -1145,12 +1148,13 @@ check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id,
pthread_mutex_lock(&lease_ctx->lock);
{
if (lease_ctx->lease_type == NONE) {
+ pthread_mutex_unlock(&lease_ctx->lock);
gf_msg_debug(frame->this->name, 0,
"No leases found continuing with the"
" fop:%s",
gf_fop_list[frame->root->op]);
ret = WIND_FOP;
- goto unlock;
+ goto out;
}
conflicts = __check_lease_conflict(frame, lease_ctx, lease_id,
is_write_fop);
@@ -1177,7 +1181,6 @@ check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id,
}
}
}
-unlock:
pthread_mutex_unlock(&lease_ctx->lock);
out:
return ret;
@@ -1354,6 +1357,7 @@ expired_recall_cleanup(void *data)
lease_inode_t *tmp = NULL;
leases_private_t *priv = NULL;
xlator_t *this = NULL;
+ time_t time_now;
GF_VALIDATE_OR_GOTO("leases", data, out);
@@ -1363,6 +1367,7 @@ expired_recall_cleanup(void *data)
gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread");
while (1) {
+ time_now = gf_time();
pthread_mutex_lock(&priv->mutex);
{
if (priv->fini) {
@@ -1371,7 +1376,7 @@ expired_recall_cleanup(void *data)
}
INIT_LIST_HEAD(&recall_cleanup_list);
if (list_empty(&priv->recall_list)) {
- sleep_till.tv_sec = time(NULL) + 600;
+ sleep_till.tv_sec = time_now + 600;
pthread_cond_timedwait(&priv->cond, &priv->mutex, &sleep_till);
}
if (!list_empty(&priv->recall_list)) {
diff --git a/xlators/features/leases/src/leases-mem-types.h b/xlators/features/leases/src/leases-mem-types.h
index 3a646cbc316..25664b44156 100644
--- a/xlators/features/leases/src/leases-mem-types.h
+++ b/xlators/features/leases/src/leases-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __LEASES_MEM_TYPES_H__
#define __LEASES_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_leases_mem_types_ {
gf_leases_mt_private_t = gf_common_mt_end + 1,
diff --git a/xlators/features/leases/src/leases-messages.h b/xlators/features/leases/src/leases-messages.h
index 81a517f63cd..da696b832de 100644
--- a/xlators/features/leases/src/leases-messages.h
+++ b/xlators/features/leases/src/leases-messages.h
@@ -11,7 +11,7 @@
#ifndef _LEASES_MESSAGES_H_
#define _LEASES_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/leases/src/leases.c b/xlators/features/leases/src/leases.c
index baeb8c7361c..04bee50ba3f 100644
--- a/xlators/features/leases/src/leases.c
+++ b/xlators/features/leases/src/leases.c
@@ -35,6 +35,7 @@ leases_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
char *lease_id = NULL;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_leases_mt_fd_ctx_t);
if (!fd_ctx) {
@@ -109,6 +110,7 @@ leases_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -157,6 +159,7 @@ leases_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -202,6 +205,7 @@ leases_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS_LK(cmd, flock->l_type, fd->flags);
@@ -240,6 +244,7 @@ leases_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
int32_t op_ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
ret = process_lease_req(frame, this, loc->inode, lease);
if (ret < 0) {
@@ -282,6 +287,7 @@ leases_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0);
@@ -328,6 +334,7 @@ leases_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0);
@@ -376,6 +383,7 @@ leases_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
/* should the lease be also checked for newloc */
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
@@ -424,6 +432,7 @@ leases_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0);
@@ -470,6 +479,7 @@ leases_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0);
@@ -516,6 +526,7 @@ leases_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, flags);
@@ -563,6 +574,7 @@ leases_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -607,6 +619,7 @@ leases_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, 0); /* TODO:fd->flags?*/
@@ -652,6 +665,7 @@ leases_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -697,6 +711,7 @@ leases_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -744,6 +759,7 @@ leases_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -789,6 +805,7 @@ leases_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int ret = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -834,6 +851,7 @@ leases_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
uint64_t ctx = 0;
EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
GET_FLAGS(frame->root->op, fd->flags);
@@ -1000,14 +1018,14 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
leases_private_t *priv = NULL;
priv = this->private;
if (!priv) {
- return 0;
+ return;
}
this->private = NULL;
@@ -1019,12 +1037,12 @@ fini(xlator_t *this)
priv->inited_recall_thr = _gf_false;
}
- GF_FREE(priv);
- if (this->ctx->tw) {
+ if (priv->timer_wheel) {
glusterfs_ctx_tw_put(this->ctx);
- this->ctx->tw = NULL;
}
- return 0;
+
+ GF_FREE(priv);
+ return;
}
static int
@@ -1135,3 +1153,16 @@ struct volume_options options[] = {
" will be forcefully purged by the server."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "leases",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/leases/src/leases.h b/xlators/features/leases/src/leases.h
index 6ac712b0bb0..a6e8a6824cc 100644
--- a/xlators/features/leases/src/leases.h
+++ b/xlators/features/leases/src/leases.h
@@ -16,15 +16,15 @@
#include "config.h"
#endif
-#include "common-utils.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "call-stub.h"
-#include "logging.h"
-#include "client_t.h"
-#include "lkowner.h"
-#include "locking.h"
-#include "upcall-utils.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/client_t.h>
+#include <glusterfs/lkowner.h>
+#include <glusterfs/locking.h>
+#include <glusterfs/upcall-utils.h>
#include "timer-wheel.h"
#include "leases-mem-types.h"
#include "leases-messages.h"
@@ -45,6 +45,14 @@
goto label; \
} while (0)
+#define EXIT_IF_INTERNAL_FOP(frame, xdata, label) \
+ do { \
+ if (frame->root->pid < 0) \
+ goto label; \
+ if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
#define GET_LEASE_ID(xdata, lease_id, client_uid) \
do { \
int ret_val = -1; \
@@ -144,17 +152,19 @@
} while (0)
struct _leases_private {
- gf_boolean_t leases_enabled;
- int32_t recall_lease_timeout;
struct list_head client_list;
struct list_head recall_list;
struct tvec_base *timer_wheel; /* timer wheel where the recall request
is qued and waits for unlock/expiry */
- gf_boolean_t fini;
pthread_t recall_thr;
- gf_boolean_t inited_recall_thr;
pthread_mutex_t mutex;
pthread_cond_t cond;
+ int32_t recall_lease_timeout;
+ gf_boolean_t inited_recall_thr;
+ gf_boolean_t fini;
+ gf_boolean_t leases_enabled;
+
+ char _pad[1]; /* manual padding */
};
typedef struct _leases_private leases_private_t;
@@ -181,18 +191,20 @@ typedef struct _lease_fd_ctx lease_fd_ctx_t;
struct _lease_inode_ctx {
struct list_head lease_id_list; /* clients that have taken leases */
int lease_type_cnt[GF_LEASE_MAX_TYPE + 1];
+ uint64_t lease_cnt; /* Total number of leases on this inode */
+ uint64_t openfd_cnt; /* number of fds open */
+ struct list_head blocked_list; /* List of fops blocked until the
+ lease recall is complete */
+ inode_t *inode; /* this represents the inode on which the
+ lock was taken, required mainly during
+ disconnect cleanup */
+ struct gf_tw_timer_list *timer;
+ pthread_mutex_t lock;
int lease_type; /* Types of leases acquired */
- uint64_t lease_cnt; /* Total number of leases on this inode */
- uint64_t openfd_cnt; /* number of fds open */
gf_boolean_t recall_in_progress; /* if lease recall is sent on this inode */
gf_boolean_t blocked_fops_resuming; /* if blocked fops are being resumed */
- struct list_head blocked_list; /* List of fops blocked until the
- lease recall is complete */
- inode_t *inode; /* this represents the inode on which the
- lock was taken, required mainly during
- disconnect cleanup */
- struct gf_tw_timer_list *timer;
- pthread_mutex_t lock;
+
+ char _pad[2]; /* manual padding */
};
typedef struct _lease_inode_ctx lease_inode_ctx_t;
@@ -202,11 +214,12 @@ struct _lease_id_entry {
char *client_uid; /* uid of the client that has
taken the lease */
int lease_type_cnt[GF_LEASE_MAX_TYPE + 1]; /* count of each lease type */
- int lease_type; /* Union of all the leases taken
- under the given lease id */
uint64_t lease_cnt; /* Number of leases taken under the
given lease id */
time_t recall_time; /* time @ which recall was sent */
+ int lease_type; /* Union of all the leases taken
+ under the given lease id */
+ char _pad[4]; /* manual padding */
};
typedef struct _lease_id_entry lease_id_entry_t;
@@ -226,9 +239,6 @@ typedef struct __lease_timer_data lease_timer_data_t;
gf_boolean_t
is_leases_enabled(xlator_t *this);
-int32_t
-get_recall_lease_timeout(xlator_t *this);
-
lease_inode_ctx_t *
lease_ctx_get(inode_t *inode, xlator_t *this);
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 0966ee753d6..ab1eac68a53 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -12,17 +12,23 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
#include "locks.h"
#include "common.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "clear.h"
+const char *clrlk_type_names[CLRLK_TYPE_MAX] = {
+ [CLRLK_INODE] = "inode",
+ [CLRLK_ENTRY] = "entry",
+ [CLRLK_POSIX] = "posix",
+};
+
int
clrlk_get_kind(char *kind)
{
@@ -175,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
if (plock->blocked) {
bcount++;
pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW,
- &plock->user_flock, -1, EAGAIN, NULL);
+ &plock->user_flock, -1, EINTR, NULL);
- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN,
+ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR,
&plock->user_flock, NULL);
} else {
@@ -254,14 +260,16 @@ blkd:
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(ilock, tmp, &released, blocked_locks)
- {
- list_del_init(&ilock->blocked_locks);
- pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW,
- &ilock->user_flock, -1, EAGAIN, ilock->volume);
- STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL);
- // No need to take lock as the locks are only in one list
- __pl_inodelk_unref(ilock);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(ilock, tmp, &released, blocked_locks)
+ {
+ list_del_init(&ilock->blocked_locks);
+ pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN, ilock->volume);
+ STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL);
+ // No need to take lock as the locks are only in one list
+ __pl_inodelk_unref(ilock);
+ }
}
if (!(args->kind & CLRLK_GRANTED)) {
@@ -357,15 +365,17 @@ blkd:
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(elock, tmp, &released, blocked_locks)
- {
- list_del_init(&elock->blocked_locks);
- entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL,
- elock->basename, ENTRYLK_LOCK, elock->type, -1,
- EAGAIN);
- STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(elock, tmp, &released, blocked_locks)
+ {
+ list_del_init(&elock->blocked_locks);
+ entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type, -1,
+ EAGAIN);
+ STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL);
- __pl_entrylk_unref(elock);
+ __pl_entrylk_unref(elock);
+ }
}
if (!(args->kind & CLRLK_GRANTED)) {
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
index 08662746f98..bc118cb1b81 100644
--- a/xlators/features/locks/src/clear.h
+++ b/xlators/features/locks/src/clear.h
@@ -10,9 +10,9 @@
#ifndef __CLEAR_H__
#define __CLEAR_H__
-#include "compat-errno.h"
-#include "stack.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/call-stub.h>
#include "locks.h"
typedef enum {
@@ -22,6 +22,8 @@ typedef enum {
CLRLK_TYPE_MAX
} clrlk_type;
+extern const char *clrlk_type_names[];
+
typedef enum {
CLRLK_BLOCKED = 1,
CLRLK_GRANTED,
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 5ad5415ed79..a2c6be93e03 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -12,11 +12,10 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syncop.h>
#include "locks.h"
#include "common.h"
@@ -213,13 +212,11 @@ void
pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd,
struct gf_flock *flock, const char *domain)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char pl_locker[256];
char pl_lockee[256];
char pl_lock[256];
- priv = this->private;
-
if (!priv->trace)
return;
@@ -291,13 +288,11 @@ pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
int cmd, struct gf_flock *flock, const char *domain)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char pl_locker[256];
char pl_lockee[256];
char pl_lock[256];
- priv = this->private;
-
if (!priv->trace)
return;
@@ -326,7 +321,7 @@ pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd)
if (!priv->trace)
return;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode = pl_inode_get(this, fd->inode, NULL);
if (pl_inode && __pl_inode_is_empty(pl_inode))
return;
@@ -362,7 +357,7 @@ pl_update_refkeeper(xlator_t *this, inode_t *inode)
int need_unref = 0;
int need_ref = 0;
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode)
return;
@@ -389,8 +384,51 @@ pl_update_refkeeper(xlator_t *this, inode_t *inode)
inode_ref(inode);
}
+/* Get lock enforcement info from disk */
+int
+pl_fetch_mlock_info_from_disk(xlator_t *this, pl_inode_t *pl_inode,
+ pl_local_t *local)
+{
+ dict_t *xdata_rsp = NULL;
+ int ret = 0;
+ int op_ret = 0;
+
+ if (!local) {
+ return -1;
+ }
+
+ if (local->fd) {
+ op_ret = syncop_fgetxattr(this, local->fd, &xdata_rsp,
+ GF_ENFORCE_MANDATORY_LOCK, NULL, NULL);
+ } else {
+ op_ret = syncop_getxattr(this, &local->loc[0], &xdata_rsp,
+ GF_ENFORCE_MANDATORY_LOCK, NULL, NULL);
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (op_ret >= 0) {
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, -op_ret, 0,
+ "getxattr failed with %d", op_ret);
+ pl_inode->mlock_enforced = _gf_false;
+
+ if (-op_ret == ENODATA) {
+ pl_inode->check_mlock_info = _gf_false;
+ } else {
+ pl_inode->check_mlock_info = _gf_true;
+ }
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ return ret;
+}
+
pl_inode_t *
-pl_inode_get(xlator_t *this, inode_t *inode)
+pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
{
uint64_t tmp_pl_inode = 0;
pl_inode_t *pl_inode = NULL;
@@ -403,6 +441,7 @@ pl_inode_get(xlator_t *this, inode_t *inode)
pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
goto unlock;
}
+
pl_inode = GF_CALLOC(1, sizeof(*pl_inode), gf_locks_mt_pl_inode_t);
if (!pl_inode) {
goto unlock;
@@ -411,6 +450,7 @@ pl_inode_get(xlator_t *this, inode_t *inode)
gf_log(this->name, GF_LOG_TRACE, "Allocating new pl inode");
pthread_mutex_init(&pl_inode->mutex, NULL);
+ pthread_cond_init(&pl_inode->check_fop_wind_count, 0);
INIT_LIST_HEAD(&pl_inode->dom_list);
INIT_LIST_HEAD(&pl_inode->ext_list);
@@ -420,8 +460,16 @@ pl_inode_get(xlator_t *this, inode_t *inode)
INIT_LIST_HEAD(&pl_inode->blocked_calls);
INIT_LIST_HEAD(&pl_inode->metalk_list);
INIT_LIST_HEAD(&pl_inode->queued_locks);
+ INIT_LIST_HEAD(&pl_inode->waiting);
gf_uuid_copy(pl_inode->gfid, inode->gfid);
+ pl_inode->check_mlock_info = _gf_true;
+ pl_inode->mlock_enforced = _gf_false;
+
+ /* -2 means never looked up. -1 means something went wrong and link
+ * tracking is disabled. */
+ pl_inode->links = -2;
+
ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
if (ret) {
pthread_mutex_destroy(&pl_inode->mutex);
@@ -433,13 +481,23 @@ pl_inode_get(xlator_t *this, inode_t *inode)
unlock:
UNLOCK(&inode->lock);
+ if ((pl_inode != NULL) && pl_is_mandatory_locking_enabled(pl_inode) &&
+ pl_inode->check_mlock_info && local) {
+ /* Note: The lock enforcement information per file can be stored in the
+ attribute flag of stat(x) in posix. With that there won't be a need
+ for doing getxattr post a reboot
+ */
+ pl_fetch_mlock_info_from_disk(this, pl_inode, local);
+ }
+
return pl_inode;
}
/* Create a new posix_lock_t */
posix_lock_t *
new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking)
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking,
+ int32_t *op_errno)
{
posix_lock_t *lock = NULL;
@@ -447,8 +505,14 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
GF_VALIDATE_OR_GOTO("posix-locks", client, out);
GF_VALIDATE_OR_GOTO("posix-locks", fd, out);
+ if (!pl_is_lk_owner_valid(owner, client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t);
if (!lock) {
+ *op_errno = ENOMEM;
goto out;
}
@@ -466,6 +530,7 @@ new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
if (lock->client_uid == NULL) {
GF_FREE(lock);
lock = NULL;
+ *op_errno = ENOMEM;
goto out;
}
@@ -540,13 +605,11 @@ static void
__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock)
{
if (lock->blocked)
- gettimeofday(&lock->blkd_time, NULL);
+ lock->blkd_time = gf_time();
else
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add_tail(&lock->list, &pl_inode->ext_list);
-
- return;
}
/* Return true if the locks overlap, false otherwise */
@@ -902,7 +965,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
struct list_head granted_list;
posix_lock_t *tmp = NULL;
posix_lock_t *lock = NULL;
-
+ pl_local_t *local = NULL;
INIT_LIST_HEAD(&granted_list);
pthread_mutex_lock(&pl_inode->mutex);
@@ -917,9 +980,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
0, 0, NULL);
-
- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
-
+ local = lock->frame->local;
+ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
__destroy_lock(lock);
}
@@ -934,10 +997,12 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
0,
};
posix_lock_t *unlock_lock = NULL;
+ int32_t op_errno = 0;
struct list_head granted_list;
posix_lock_t *tmp = NULL;
posix_lock_t *lock = NULL;
+ pl_local_t *local = NULL;
int ret = -1;
@@ -951,7 +1016,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
unlock_lock = new_posix_lock(&flock, old_lock->client, old_lock->client_pid,
&old_lock->owner, old_lock->fd,
- old_lock->lk_flags, 0);
+ old_lock->lk_flags, 0, &op_errno);
GF_VALIDATE_OR_GOTO(this->name, unlock_lock, out);
ret = 0;
@@ -965,9 +1030,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
0, 0, NULL);
-
- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
-
+ local = lock->frame->local;
+ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
__destroy_lock(lock);
}
@@ -1002,7 +1067,7 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
if (__is_lock_grantable(pl_inode, lock)) {
if (pl_metalock_is_active(pl_inode)) {
- __pl_queue_lock(pl_inode, lock, can_block);
+ __pl_queue_lock(pl_inode, lock);
pthread_mutex_unlock(&pl_inode->mutex);
ret = -2;
goto out;
@@ -1015,7 +1080,7 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
__insert_and_merge(pl_inode, lock);
} else if (can_block) {
if (pl_metalock_is_active(pl_inode)) {
- __pl_queue_lock(pl_inode, lock, can_block);
+ __pl_queue_lock(pl_inode, lock);
pthread_mutex_unlock(&pl_inode->mutex);
ret = -2;
goto out;
@@ -1026,6 +1091,10 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid, lkowner_utoa(&lock->owner),
lock->user_flock.l_start, lock->user_flock.l_len);
+
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, NULL);
+
lock->blocked = 1;
__insert_lock(pl_inode, lock);
ret = -1;
@@ -1052,10 +1121,7 @@ out:
posix_lock_t *
pl_getlk(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
-
- conf = first_conflicting_overlap(pl_inode, lock);
-
+ posix_lock_t *conf = first_conflicting_overlap(pl_inode, lock);
if (conf == NULL) {
lock->fl_type = F_UNLCK;
return lock;
@@ -1077,3 +1143,449 @@ pl_does_monkey_want_stuck_lock()
return _gf_true;
return _gf_false;
}
+
+int
+pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock)
+{
+ posix_lock_t *lock = NULL;
+ posix_lock_t *i = NULL;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *itr = NULL;
+ struct list_head unwind_blist = {
+ 0,
+ };
+ struct list_head unwind_rw_list = {
+ 0,
+ };
+ int ret = 0;
+
+ INIT_LIST_HEAD(&unwind_blist);
+ INIT_LIST_HEAD(&unwind_rw_list);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ /*
+ - go through the lock list
+ - remove all locks from different owners
+ - same owner locks will be added or substracted based on
+ the new request
+ - add the new lock
+ */
+ list_for_each_entry_safe(lock, i, &pl_inode->ext_list, list)
+ {
+ if (lock->blocked) {
+ list_del_init(&lock->list);
+ list_add(&lock->list, &unwind_blist);
+ continue;
+ }
+
+ if (locks_overlap(lock, reqlock)) {
+ if (same_owner(lock, reqlock))
+ continue;
+
+ /* remove conflicting locks */
+ list_del_init(&lock->list);
+ __delete_lock(lock);
+ __destroy_lock(lock);
+ }
+ }
+
+ __insert_and_merge(pl_inode, reqlock);
+
+ list_for_each_entry_safe(rw, itr, &pl_inode->rw_list, list)
+ {
+ list_del_init(&rw->list);
+ list_add(&rw->list, &unwind_rw_list);
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ /* unwind blocked locks */
+ list_for_each_entry_safe(lock, i, &unwind_blist, list)
+ {
+ PL_STACK_UNWIND_AND_FREE(((pl_local_t *)lock->frame->local), lk,
+ lock->frame, -1, EBUSY, &lock->user_flock,
+ NULL);
+ __destroy_lock(lock);
+ }
+
+ /* unwind blocked IOs */
+ list_for_each_entry_safe(rw, itr, &unwind_rw_list, list)
+ {
+ pl_clean_local(rw->stub->frame->local);
+ call_unwind_error(rw->stub, -1, EBUSY);
+ }
+
+ return ret;
+}
+
+/* Return true in case we need to ensure mandatory-locking
+ * semantics under different modes.
+ */
+gf_boolean_t
+pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode)
+{
+ posix_locks_private_t *priv = THIS->private;
+
+ if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
+ return _gf_true;
+ else if (priv->mandatory_mode == MLK_FORCED ||
+ priv->mandatory_mode == MLK_OPTIMAL)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+pl_clean_local(pl_local_t *local)
+{
+ if (!local)
+ return;
+
+ if (local->inodelk_dom_count_req)
+ data_unref(local->inodelk_dom_count_req);
+ loc_wipe(&local->loc[0]);
+ loc_wipe(&local->loc[1]);
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->inode)
+ inode_unref(local->inode);
+ mem_put(local);
+}
+
+/*
+TODO: detach local initialization from PL_LOCAL_GET_REQUESTS and add it here
+*/
+int
+pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+{
+ pl_local_t *local = NULL;
+
+ if (!loc && !fd) {
+ return -1;
+ }
+
+ if (!frame->local) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "mem allocation failed");
+ return -1;
+ }
+
+ local->inode = (loc ? inode_ref(loc->inode) : inode_ref(fd->inode));
+
+ frame->local = local;
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client)
+{
+ if (client && (client->opversion < GD_OP_VERSION_7_0)) {
+ return _gf_true;
+ }
+
+ if (is_lk_owner_null(owner)) {
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
+static int32_t
+pl_inode_from_loc(loc_t *loc, inode_t **pinode)
+{
+ inode_t *inode = NULL;
+ int32_t error = 0;
+
+ if (loc->inode != NULL) {
+ inode = inode_ref(loc->inode);
+ goto done;
+ }
+
+ if (loc->parent == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (!gf_uuid_is_null(loc->gfid)) {
+ inode = inode_find(loc->parent->table, loc->gfid);
+ if (inode != NULL) {
+ goto done;
+ }
+ }
+
+ if (loc->name == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ inode = inode_grep(loc->parent->table, loc->parent, loc->name);
+ if (inode == NULL) {
+ /* We haven't found any inode. This means that the file doesn't exist
+ * or that even if it exists, we don't have any knowledge about it, so
+ * we don't have locks on it either, which is fine for our purposes. */
+ goto done;
+ }
+
+done:
+ *pinode = inode;
+
+ return error;
+}
+
+static gf_boolean_t
+pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *lock;
+ gf_boolean_t has_owners = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ /* If the lock belongs to the same client, we assume it's related
+ * to the same operation, so we allow the removal to continue. */
+ if (lock->client == client) {
+ continue;
+ }
+ /* If the lock belongs to an internal process, we don't block the
+ * removal. */
+ if (lock->client_pid < 0) {
+ continue;
+ }
+ if (contend == NULL) {
+ return _gf_true;
+ }
+ has_owners = _gf_true;
+ inodelk_contention_notify_check(xl, lock, now, contend);
+ }
+ }
+
+ return has_owners;
+}
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend)
+{
+ struct timespec now;
+ inode_t *inode;
+ pl_inode_t *pl_inode;
+ int32_t error;
+
+ pl_inode = NULL;
+
+ error = pl_inode_from_loc(loc, &inode);
+ if ((error != 0) || (inode == NULL)) {
+ goto done;
+ }
+
+ pl_inode = pl_inode_get(xl, inode, NULL);
+ if (pl_inode == NULL) {
+ inode_unref(inode);
+ error = ENOMEM;
+ goto done;
+ }
+
+ /* pl_inode_from_loc() already increments ref count for inode, so
+ * we only assign here our reference. */
+ pl_inode->inode = inode;
+
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (pl_inode->removed) {
+ error = ESTALE;
+ goto unlock;
+ }
+
+ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
+ error = -1;
+ /* We skip the unlock here because the caller must create a stub when
+ * we return -1 and do a call to pl_inode_remove_complete(), which
+ * assumes the lock is still acquired and will release it once
+ * everything else is prepared. */
+ goto done;
+ }
+
+ pl_inode->is_locked = _gf_true;
+ pl_inode->remove_running++;
+
+unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+done:
+ *ppl_inode = pl_inode;
+
+ return error;
+}
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend)
+{
+ pl_inode_lock_t *lock;
+ int32_t error = -1;
+
+ if (stub != NULL) {
+ list_add_tail(&stub->list, &pl_inode->waiting);
+ pl_inode->is_locked = _gf_true;
+ } else {
+ error = ENOMEM;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, list);
+ list_del_init(&lock->list);
+ __pl_inodelk_unref(lock);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (error < 0) {
+ inodelk_contention_notify(xl, contend);
+ }
+
+ inode_unref(pl_inode->inode);
+
+ return error;
+}
+
+void
+pl_inode_remove_wake(struct list_head *list)
+{
+ call_stub_t *stub;
+
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
+
+ call_resume(stub);
+ }
+}
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
+{
+ struct list_head contend, granted;
+ struct timespec now;
+ pl_dom_list_t *dom;
+
+ if (pl_inode == NULL) {
+ return;
+ }
+
+ INIT_LIST_HEAD(&contend);
+ INIT_LIST_HEAD(&granted);
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (error == 0) {
+ if (pl_inode->links >= 0) {
+ pl_inode->links--;
+ }
+ if (pl_inode->links == 0) {
+ pl_inode->removed = _gf_true;
+ }
+ }
+
+ pl_inode->remove_running--;
+
+ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
+ pl_inode->is_locked = _gf_false;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
+ &contend);
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(xl, pl_inode, &granted);
+
+ inodelk_contention_notify(xl, &contend);
+
+ inode_unref(pl_inode->inode);
+}
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list)
+{
+ call_stub_t *stub, *tmp;
+
+ if (!pl_inode->is_locked) {
+ return;
+ }
+
+ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list)
+ {
+ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL,
+ NULL)) {
+ list_move_tail(&stub->list, list);
+ }
+ }
+}
+
+/* This function determines if an inodelk attempt can be done now or it needs
+ * to wait.
+ *
+ * Possible return values:
+ * < 0: An error occurred. Currently only -ESTALE can be returned if the
+ * inode has been deleted previously by unlink/rmdir/rename
+ * = 0: The lock can be attempted.
+ * > 0: The lock needs to wait because a conflicting remove operation is
+ * ongoing.
+ */
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *ilock;
+
+ /* If the inode has been deleted, we won't allow any lock. */
+ if (pl_inode->removed) {
+ return -ESTALE;
+ }
+
+ /* We only synchronize with locks made for regular operations coming from
+ * the user. Locks done for internal purposes are hard to control and could
+ * lead to long delays or deadlocks quite easily. */
+ if (lock->client_pid < 0) {
+ return 0;
+ }
+ if (!pl_inode->is_locked) {
+ return 0;
+ }
+ if (pl_inode->remove_running > 0) {
+ return 1;
+ }
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(ilock, &dom->inodelk_list, list)
+ {
+ /* If a lock from the same client is already granted, we allow this
+ * one to continue. This is necessary to prevent deadlocks when
+ * multiple locks are taken for the same operation.
+ *
+ * On the other side it's unlikely that the same client sends
+ * completely unrelated locks for the same inode.
+ */
+ if (ilock->client == lock->client) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index c3d0e361933..281223bf3b8 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -10,7 +10,6 @@
#ifndef __COMMON_H__
#define __COMMON_H__
-#include "lkowner.h"
/*dump locks format strings */
#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
#define ENTRY_FMT "type=%s on basename=%s"
@@ -32,12 +31,34 @@
#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT(fop, frame, op_ret, params); \
+ if (__local) { \
+ if (__local->inodelk_dom_count_req) \
+ data_unref(__local->inodelk_dom_count_req); \
+ loc_wipe(&__local->loc[0]); \
+ loc_wipe(&__local->loc[1]); \
+ if (__local->fd) \
+ fd_unref(__local->fd); \
+ if (__local->inode) \
+ inode_unref(__local->inode); \
+ if (__local->xdata) { \
+ dict_unref(__local->xdata); \
+ __local->xdata = NULL; \
+ } \
+ mem_put(__local); \
+ } \
+ } while (0)
+
posix_lock_t *
new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int can_block);
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking,
+ int32_t *op_errno);
pl_inode_t *
-pl_inode_get(xlator_t *this, inode_t *inode);
+pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local);
posix_lock_t *
pl_getlk(pl_inode_t *inode, posix_lock_t *lock);
@@ -45,6 +66,9 @@ pl_getlk(pl_inode_t *inode, posix_lock_t *lock);
int
pl_setlk(xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, int can_block);
+int
+pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
void
grant_blocked_locks(xlator_t *this, pl_inode_t *inode);
@@ -81,6 +105,15 @@ void
__pl_inodelk_unref(pl_inode_lock_t *lock);
void
+__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend);
+
+void
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted);
+
+void
grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
pl_dom_list_t *dom, struct timespec *now,
struct list_head *contend);
@@ -177,9 +210,53 @@ __pl_entrylk_unref(pl_entry_lock_t *lock);
int
pl_metalock_is_active(pl_inode_t *pl_inode);
-int
-__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block);
+void
+__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+void
+inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
+void
+entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
gf_boolean_t
pl_does_monkey_want_stuck_lock();
+
+gf_boolean_t
+pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode);
+
+void
+pl_clean_local(pl_local_t *local);
+
+int
+pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd);
+
+gf_boolean_t
+pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client);
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend);
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend);
+
+void
+pl_inode_remove_wake(struct list_head *list);
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error);
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list);
+
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 60e7bed2a44..fd772c850dd 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -7,13 +7,13 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-#include "upcall-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
#include "clear.h"
@@ -39,13 +39,20 @@ __pl_entrylk_ref(pl_entry_lock_t *lock)
static pl_entry_lock_t *
new_entrylk_lock(pl_inode_t *pinode, const char *basename, entrylk_type type,
- const char *domain, call_frame_t *frame, char *conn_id)
+ const char *domain, call_frame_t *frame, char *conn_id,
+ int32_t *op_errno)
{
pl_entry_lock_t *newlock = NULL;
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
newlock = GF_CALLOC(1, sizeof(pl_entry_lock_t),
gf_locks_mt_pl_entry_lock_t);
if (!newlock) {
+ *op_errno = ENOMEM;
goto out;
}
@@ -114,7 +121,6 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
@@ -122,8 +128,7 @@ __stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
* chance? Or just the locks we are attempting to acquire?
*/
if (names_conflict(candidate_lock->basename, requested_lock->basename)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -197,9 +202,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
- struct timespec *now)
+void
+entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -209,7 +214,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -218,7 +223,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -231,7 +236,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -325,9 +330,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
break;
}
}
- if (__entrylk_needs_contention_notify(this, tmp, now)) {
- list_add_tail(&tmp->contend, contend);
- }
+ entrylk_contention_notify_check(this, tmp, now, contend);
}
}
@@ -539,19 +542,17 @@ static int
__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
pl_entry_lock_t *lock, int nonblock)
{
- struct timeval now;
-
if (nonblock)
goto out;
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks);
gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
pinode, lock->basename);
+ entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename,
+ ENTRYLK_LOCK, lock->type);
out:
return -EAGAIN;
}
@@ -605,7 +606,7 @@ __lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
}
__pl_entrylk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->domain_list, &dom->entrylk_list);
ret = 0;
@@ -644,11 +645,10 @@ int32_t
check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename)
{
int32_t entrylk = 0;
- pl_inode_t *pinode = 0;
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *conf = NULL;
- pinode = pl_inode_get(this, parent);
+ pl_inode_t *pinode = pl_inode_get(this, parent, NULL);
if (!pinode)
goto out;
pthread_mutex_lock(&pinode->mutex);
@@ -689,10 +689,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend);
if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
/* Grants locks if possible which are blocked on a lock */
@@ -770,7 +769,7 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
if (xdata)
dict_ret = dict_get_str(xdata, "connection-id", &conn_id);
- pinode = pl_inode_get(this, inode);
+ pinode = pl_inode_get(this, inode, NULL);
if (!pinode) {
op_errno = ENOMEM;
goto out;
@@ -794,10 +793,9 @@ pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
entrylk_trace_in(this, frame, volume, fd, loc, basename, cmd, type);
reqlock = new_entrylk_lock(pinode, basename, type, dom->domain, frame,
- conn_id);
+ conn_id, &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -933,8 +931,6 @@ out:
op_ret, op_errno);
unwind:
STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL);
- } else {
- entrylk_trace_block(this, frame, volume, fd, loc, basename, cmd, type);
}
if (pcontend != NULL) {
@@ -1072,32 +1068,36 @@ pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
}
pthread_mutex_unlock(&ctx->lock);
- list_for_each_entry_safe(l, tmp, &unwind, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL);
- list_add_tail(&l->client_list, &released);
+ if (l->frame)
+ STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
}
- list_for_each_entry_safe(l, tmp, &released, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
+ {
+ list_del_init(&l->client_list);
- pinode = l->pinode;
+ pinode = l->pinode;
- dom = get_domain(pinode, l->volume);
+ dom = get_domain(pinode, l->volume);
- grant_blocked_entry_locks(this, pinode, dom, &now, pcontend);
+ grant_blocked_entry_locks(this, pinode, dom, &now, pcontend);
- pthread_mutex_lock(&pinode->mutex);
- {
- __pl_entrylk_unref(l);
- }
- pthread_mutex_unlock(&pinode->mutex);
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ __pl_entrylk_unref(l);
+ }
+ pthread_mutex_unlock(&pinode->mutex);
- inode_unref(pinode->inode);
+ inode_unref(pinode->inode);
+ }
}
if (pcontend != NULL) {
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index b8e005038d8..d4e51d6e0a1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -7,18 +7,16 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-#include "upcall-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
#include "clear.h"
#include "common.h"
-#include "pl-messages.h"
void
__delete_inode_lock(pl_inode_lock_t *lock)
@@ -142,15 +140,13 @@ __stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock,
pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
{
posix_locks_private_t *priv = NULL;
- struct timeval curr;
priv = this->private;
/* Question: Should we just prune them all given the
* chance? Or just the locks we are attempting to acquire?
*/
if (inodelk_conflict(candidate_lock, requested_lock)) {
- gettimeofday(&curr, NULL);
- *lock_age_sec = curr.tv_sec - candidate_lock->granted_time.tv_sec;
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
if (*lock_age_sec > priv->revocation_secs)
return _gf_true;
}
@@ -231,9 +227,9 @@ out:
return revoke_lock;
}
-static gf_boolean_t
-__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
- struct timespec *now)
+void
+inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
posix_locks_private_t *priv;
int64_t elapsed;
@@ -243,7 +239,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
/* If this lock is in a list, it means that we are about to send a
* notification for it, so no need to do anything else. */
if (!list_empty(&lock->contend)) {
- return _gf_false;
+ return;
}
elapsed = now->tv_sec;
@@ -252,7 +248,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
elapsed--;
}
if (elapsed < priv->notify_contention_delay) {
- return _gf_false;
+ return;
}
/* All contention notifications will be sent outside of the locked
@@ -265,7 +261,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock,
lock->contention_time = *now;
- return _gf_true;
+ list_add_tail(&lock->contend, contend);
}
void
@@ -353,9 +349,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
break;
}
}
- if (__inodelk_needs_contention_notify(this, l, now)) {
- list_add_tail(&l->contend, contend);
- }
+ inodelk_contention_notify_check(this, l, now, contend);
}
}
@@ -401,15 +395,11 @@ static int
__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
int can_block)
{
- struct timeval now;
-
if (can_block == 0) {
goto out;
}
- gettimeofday(&now, NULL);
-
- lock->blkd_time = now;
+ lock->blkd_time = gf_time();
list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks);
gf_msg_trace(this->name, 0,
@@ -420,6 +410,8 @@ __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
lkowner_utoa(&lock->owner), lock->user_flock.l_start,
lock->user_flock.l_len);
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ lock->volume);
out:
return -EAGAIN;
}
@@ -433,12 +425,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
struct list_head *contend)
{
pl_inode_lock_t *conf = NULL;
- int ret = -EINVAL;
+ int ret;
- conf = __inodelk_grantable(this, dom, lock, now, contend);
- if (conf) {
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ ret = pl_inode_remove_inodelk(pl_inode, lock);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret == 0) {
+ conf = __inodelk_grantable(this, dom, lock, now, contend);
+ }
+ if ((ret > 0) || (conf != NULL)) {
+ return __lock_blocked_add(this, dom, lock, can_block);
}
/* To prevent blocked locks starvation, check if there are any blocked
@@ -460,17 +457,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
"starvation");
}
- ret = __lock_blocked_add(this, dom, lock, can_block);
- goto out;
+ return __lock_blocked_add(this, dom, lock, can_block);
}
__pl_inodelk_ref(lock);
- gettimeofday(&lock->granted_time, NULL);
+ lock->granted_time = gf_time();
list_add(&lock->list, &dom->inodelk_list);
- ret = 0;
-
-out:
- return ret;
+ return 0;
}
/* Return true if the two inodelks have exactly same lock boundaries */
@@ -502,33 +495,36 @@ static pl_inode_lock_t *
__inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
pl_inode_lock_t *conf = NULL;
+ inode_t *inode = NULL;
+
+ inode = lock->pl_inode->inode;
conf = find_matching_inodelk(lock, dom);
if (!conf) {
gf_log(this->name, GF_LOG_ERROR,
" Matching lock not found for unlock %llu-%llu, by %s "
- "on %p",
+ "on %p for gfid:%s",
(unsigned long long)lock->fl_start,
(unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner),
- lock->client);
+ lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
goto out;
}
__delete_inode_lock(conf);
gf_log(this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock %llu-%llu, by %s on %p",
+ " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s",
(unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end,
- lkowner_utoa(&lock->owner), lock->client);
+ lkowner_utoa(&lock->owner), lock->client,
+ inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
out:
return conf;
}
-static void
+void
__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
struct list_head *granted, pl_dom_list_t *dom,
struct timespec *now, struct list_head *contend)
{
- int bl_ret = 0;
pl_inode_lock_t *bl = NULL;
pl_inode_lock_t *tmp = NULL;
@@ -541,52 +537,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
{
list_del_init(&bl->blocked_locks);
- bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
+ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
- if (bl_ret == 0) {
- list_add(&bl->blocked_locks, granted);
+ if (bl->status != -EAGAIN) {
+ list_add_tail(&bl->blocked_locks, granted);
}
}
- return;
}
-/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct timespec *now,
- struct list_head *contend)
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- struct list_head granted;
pl_inode_lock_t *lock;
pl_inode_lock_t *tmp;
+ int32_t op_ret;
+ int32_t op_errno;
- INIT_LIST_HEAD(&granted);
-
- pthread_mutex_lock(&pl_inode->mutex);
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
- __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
- contend);
- }
- pthread_mutex_unlock(&pl_inode->mutex);
-
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
- {
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
- lkowner_utoa(&lock->owner), lock->user_flock.l_start,
- lock->user_flock.l_len);
-
+ if (lock->status == 0) {
+ op_ret = 0;
+ op_errno = 0;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ } else {
+ op_ret = -1;
+ op_errno = -lock->status;
+ }
pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
- 0, 0, lock->volume);
+ op_ret, op_errno, lock->volume);
- STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL);
+ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL);
lock->frame = NULL;
}
pthread_mutex_lock(&pl_inode->mutex);
{
- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks)
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
list_del_init(&lock->blocked_locks);
__pl_inodelk_unref(lock);
@@ -595,6 +587,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
pthread_mutex_unlock(&pl_inode->mutex);
}
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ struct list_head granted;
+
+ INIT_LIST_HEAD(&granted);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
+ contend);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ unwind_granted_inodes(this, pl_inode, &granted);
+}
+
static void
pl_inodelk_log_cleanup(pl_inode_lock_t *lock)
{
@@ -656,7 +668,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
* and blocked lists, then this means that a parallel
* unlock on another inodelk (L2 say) may have 'granted'
* L1 and added it to 'granted' list in
- * __grant_blocked_node_locks() (although using the
+ * __grant_blocked_inode_locks() (although using the
* 'blocked_locks' member). In that case, the cleanup
* codepath must try and grant other overlapping
* blocked inodelks from other clients, now that L1 is
@@ -691,31 +703,35 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
}
pthread_mutex_unlock(&ctx->lock);
- list_for_each_entry_safe(l, tmp, &unwind, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- if (l->frame)
- STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL);
- list_add_tail(&l->client_list, &released);
+ if (l->frame)
+ STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
}
- list_for_each_entry_safe(l, tmp, &released, client_list)
- {
- list_del_init(&l->client_list);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
+ {
+ list_del_init(&l->client_list);
- pl_inode = l->pl_inode;
+ pl_inode = l->pl_inode;
- dom = get_domain(pl_inode, l->volume);
+ dom = get_domain(pl_inode, l->volume);
- grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
+ grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
- pthread_mutex_lock(&pl_inode->mutex);
- {
- __pl_inodelk_unref(l);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __pl_inodelk_unref(l);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ inode_unref(pl_inode->inode);
}
- pthread_mutex_unlock(&pl_inode->mutex);
- inode_unref(pl_inode->inode);
}
if (pcontend != NULL) {
@@ -737,6 +753,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
gf_boolean_t need_inode_unref = _gf_false;
struct list_head *pcontend = NULL;
struct list_head contend;
+ struct list_head wake;
struct timespec now = {};
short fl_type;
@@ -788,6 +805,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
timespec_now(&now);
}
+ INIT_LIST_HEAD(&wake);
+
if (ctx)
pthread_mutex_lock(&ctx->lock);
pthread_mutex_lock(&pl_inode->mutex);
@@ -810,18 +829,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
lock->client_pid, lkowner_utoa(&lock->owner),
lock->user_flock.l_start, lock->user_flock.l_len);
- if (can_block)
+ if (can_block) {
unref = _gf_false;
- /* For all but the case where a non-blocking
- * lock attempt fails, the extra ref taken at
- * the start of this function must be negated.
- */
- else
- need_inode_unref = _gf_true;
+ }
}
-
- if (ctx && (!ret || can_block))
+ /* For all but the case where a non-blocking lock attempt fails
+ * with -EAGAIN, the extra ref taken at the start of this function
+ * must be negated. */
+ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block);
+ if (ctx && !need_inode_unref) {
list_add_tail(&lock->client_list, &ctx->inodelk_lockers);
+ }
} else {
/* Irrespective of whether unlock succeeds or not,
* the extra inode ref that was done at the start of
@@ -839,6 +857,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
list_del_init(&retlock->client_list);
__pl_inodelk_unref(retlock);
+ pl_inode_remove_unlocked(this, pl_inode, &wake);
+
ret = 0;
}
out:
@@ -849,6 +869,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
if (ctx)
pthread_mutex_unlock(&ctx->lock);
+ pl_inode_remove_wake(&wake);
+
/* The following (extra) unref corresponds to the ref that
* was done at the time the lock was granted.
*/
@@ -869,17 +891,23 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
}
/* Create a new inode_lock_t */
-pl_inode_lock_t *
+static pl_inode_lock_t *
new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
call_frame_t *frame, xlator_t *this, const char *volume,
- char *conn_id)
+ char *conn_id, int32_t *op_errno)
{
pl_inode_lock_t *lock = NULL;
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t);
if (!lock) {
- return NULL;
+ *op_errno = ENOMEM;
+ goto out;
}
lock->fl_start = flock->l_start;
@@ -907,6 +935,7 @@ new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
INIT_LIST_HEAD(&lock->contend);
__pl_inodelk_ref(lock);
+out:
return lock;
}
@@ -951,6 +980,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
int ret = -1;
GF_UNUSED int dict_ret = -1;
int can_block = 0;
+ short lock_type = 0;
pl_inode_t *pinode = NULL;
pl_inode_lock_t *reqlock = NULL;
pl_dom_list_t *dom = NULL;
@@ -988,7 +1018,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
}
}
- pinode = pl_inode_get(this, inode);
+ pinode = pl_inode_get(this, inode, NULL);
if (!pinode) {
op_errno = ENOMEM;
goto unwind;
@@ -1001,11 +1031,10 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
}
reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid,
- frame, this, dom->domain, conn_id);
+ frame, this, dom->domain, conn_id, &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -1016,16 +1045,20 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
/* fall through */
case F_SETLK:
+ lock_type = flock->l_type;
memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock));
ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom,
inode);
if (ret < 0) {
- if ((can_block) && (F_UNLCK != flock->l_type)) {
- pl_trace_block(this, frame, fd, loc, cmd, flock, volume);
- goto out;
+ if (ret == -EAGAIN) {
+ if (can_block && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret);
}
- gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
op_errno = -ret;
goto unwind;
}
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
index bec7db2d58e..a76605027b3 100644
--- a/xlators/features/locks/src/locks-mem-types.h
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __LOCKS_MEM_TYPES_H__
#define __LOCKS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_locks_mem_types_ {
gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1,
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index cf2849fc251..c868eb494a2 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -10,13 +10,13 @@
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
-#include "compat-errno.h"
-#include "stack.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/call-stub.h>
#include "locks-mem-types.h"
-#include "client_t.h"
+#include <glusterfs/client_t.h>
-#include "lkowner.h"
+#include <glusterfs/lkowner.h>
typedef enum {
MLK_NONE,
@@ -30,11 +30,11 @@ struct __pl_fd;
struct __posix_lock {
struct list_head list;
- short fl_type;
off_t fl_start;
off_t fl_end;
uint32_t lk_flags;
+ short fl_type;
short blocked; /* waiting to acquire */
struct gf_flock user_flock; /* the flock supplied by the user */
xlator_t *this; /* required for blocked locks */
@@ -43,9 +43,8 @@ struct __posix_lock {
fd_t *fd;
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
/* These two together serve to uniquely identify each process
across nodes */
@@ -74,7 +73,6 @@ struct __pl_inode_lock {
struct list_head contend; /* list of contending locks */
int ref;
- short fl_type;
off_t fl_start;
off_t fl_end;
@@ -86,9 +84,9 @@ struct __pl_inode_lock {
call_frame_t *frame;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -102,6 +100,10 @@ struct __pl_inode_lock {
char *connection_id; /* stores the client connection id */
struct list_head client_list; /* list of all locks from a client */
+ short fl_type;
+
+ int32_t status; /* Error code when we try to grant a lock in blocked
+ state */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
@@ -135,11 +137,10 @@ struct __entry_lock {
const char *volume;
const char *basename;
- entrylk_type type;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval
- granted_time; /*time at which lock was queued into active list*/
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
+
/*last time at which lock contention was detected and notified*/
struct timespec contention_time;
@@ -150,6 +151,7 @@ struct __entry_lock {
char *connection_id; /* stores the client connection id */
struct list_head client_list; /* list of all locks from a client */
+ entrylk_type type;
};
typedef struct __entry_lock pl_entry_lock_t;
@@ -164,13 +166,14 @@ struct __pl_inode {
struct list_head rw_list; /* list of waiting r/w requests */
struct list_head reservelk_list; /* list of reservelks */
struct list_head blocked_reservelks; /* list of blocked reservelks */
- struct list_head
- blocked_calls; /* List of blocked lock calls while a reserve is held*/
- struct list_head metalk_list; /* Meta lock list */
- /* This is to store the incoming lock
- requests while meta lock is enabled */
- struct list_head queued_locks;
- int mandatory; /* if mandatory locking is enabled */
+ struct list_head blocked_calls; /* List of blocked lock calls while a
+ reserve is held*/
+ struct list_head metalk_list; /* Meta lock list */
+ struct list_head queued_locks; /* This is to store the incoming lock
+ requests while meta lock is enabled */
+ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir
+ the inode. */
+ int mandatory; /* if mandatory locking is enabled */
inode_t *refkeeper; /* hold refs on an inode while locks are
held to prevent pruning */
@@ -179,6 +182,31 @@ struct __pl_inode {
of inode_t as long as there are
locks on it */
gf_boolean_t migrated;
+
+ /* Flag to indicate whether to read mlock-enforce xattr from disk */
+ gf_boolean_t check_mlock_info;
+
+ /* Mandatory_lock enforce: IO will be allowed if and only if the lkowner has
+ held the lock.
+
+ Note: An xattr is set on the file to recover this information post
+ reboot. If client does not want mandatory lock to be enforced, then it
+ should remove this xattr explicitly
+ */
+ gf_boolean_t mlock_enforced;
+ /* There are scenarios where mandatory lock is granted but there are IOs
+ pending at posix level. To avoid this before preempting the previous lock
+ owner, we wait for all the fops to be unwound.
+ */
+ int fop_wind_count;
+ pthread_cond_t check_fop_wind_count;
+
+ gf_boolean_t track_fop_wind_count;
+
+ int32_t links; /* Number of hard links the inode has. */
+ uint32_t remove_running; /* Number of remove operations running. */
+ gf_boolean_t is_locked; /* Regular locks will be blocked. */
+ gf_boolean_t removed; /* The inode has been deleted. */
};
typedef struct __pl_inode pl_inode_t;
@@ -196,29 +224,33 @@ struct __pl_metalk {
typedef struct __pl_metalk pl_meta_lock_t;
typedef struct {
+ char *brickname;
+ uint32_t revocation_secs;
+ uint32_t revocation_max_blocked;
+ uint32_t notify_contention_delay;
mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */
gf_boolean_t trace; /* trace lock requests in and out */
- char *brickname;
gf_boolean_t monkey_unlocking;
- uint32_t revocation_secs;
gf_boolean_t revocation_clear_all;
- uint32_t revocation_max_blocked;
gf_boolean_t notify_contention;
- uint32_t notify_contention_delay;
+ gf_boolean_t mlock_enforced;
} posix_locks_private_t;
typedef struct {
- gf_boolean_t entrylk_count_req;
- gf_boolean_t inodelk_count_req;
- gf_boolean_t posixlk_count_req;
- gf_boolean_t parent_entrylk_req;
data_t *inodelk_dom_count_req;
dict_t *xdata;
loc_t loc[2];
fd_t *fd;
+ inode_t *inode;
off_t offset;
glusterfs_fop_t op;
+ gf_boolean_t entrylk_count_req;
+ gf_boolean_t inodelk_count_req;
+ gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+ gf_boolean_t multiple_dom_lk_requests;
+ int update_mlock_enforced_flag;
} pl_local_t;
typedef struct {
@@ -239,6 +271,15 @@ typedef struct _locks_ctx {
struct list_head metalk_list;
} pl_ctx_t;
+typedef struct _multi_dom_lk_data {
+ xlator_t *this;
+ inode_t *inode;
+ dict_t *xdata_rsp;
+ gf_boolean_t keep_max;
+} multi_dom_lk_data;
+
+typedef enum { DECREMENT, INCREMENT } pl_count_op_t;
+
pl_ctx_t *
pl_ctx_get(client_t *client, xlator_t *xlator);
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
index a99e1bbce43..e2d3d7ca974 100644
--- a/xlators/features/locks/src/pl-messages.h
+++ b/xlators/features/locks/src/pl-messages.h
@@ -11,7 +11,7 @@
#ifndef _PL_MESSAGES_H_
#define _PL_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index af162c5284c..cf0ae4c57dd 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -12,19 +12,15 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
#include "locks.h"
#include "common.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "clear.h"
-#include "defaults.h"
-#include "syncop.h"
-#include "pl-messages.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -43,21 +39,6 @@ pl_lockinfo_get_brickname(xlator_t *, inode_t *, int32_t *);
static int
fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
-#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \
- do { \
- frame->local = NULL; \
- STACK_UNWIND_STRICT(fop, frame, op_ret, params); \
- if (__local) { \
- if (__local->inodelk_dom_count_req) \
- data_unref(__local->inodelk_dom_count_req); \
- loc_wipe(&__local->loc[0]); \
- loc_wipe(&__local->loc[1]); \
- if (__local->fd) \
- fd_unref(__local->fd); \
- mem_put(__local); \
- } \
- } while (0)
-
/*
* The client is always requesting data, but older
* servers were not returning it. Newer ones are, so
@@ -115,69 +96,156 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc) \
do { \
if (pl_has_xdata_requests(xdata)) { \
- frame->local = mem_get0(this->local_pool); \
+ if (!frame->local) \
+ frame->local = mem_get0(this->local_pool); \
pl_local_t *__local = frame->local; \
if (__local) { \
if (__fd) { \
__local->fd = fd_ref(__fd); \
+ __local->inode = inode_ref(__fd->inode); \
} else { \
if (__loc) \
loc_copy(&__local->loc[0], __loc); \
if (__newloc) \
loc_copy(&__local->loc[1], __newloc); \
+ __local->inode = inode_ref(__local->loc[0].inode); \
} \
pl_get_xdata_requests(__local, xdata); \
} \
} \
} while (0)
+#define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \
+ do { \
+ if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \
+ (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \
+ inode_t *__inode = (loc ? loc->inode : fd->inode); \
+ pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \
+ if (__pl_inode == NULL) { \
+ op_ret = -1; \
+ op_errno = ENOMEM; \
+ goto unwind; \
+ } \
+ if (!pl_is_mandatory_locking_enabled(__pl_inode) || \
+ !priv->mlock_enforced) { \
+ op_ret = -1; \
+ gf_msg(this->name, GF_LOG_DEBUG, EINVAL, 0, \
+ "option %s would need mandatory lock to be enabled " \
+ "and feature.enforce-mandatory-lock option to be set " \
+ "to on", \
+ GF_ENFORCE_MANDATORY_LOCK); \
+ op_errno = EINVAL; \
+ goto unwind; \
+ } \
+ \
+ op_ret = pl_local_init(frame, this, loc, fd); \
+ if (op_ret) { \
+ op_errno = ENOMEM; \
+ goto unwind; \
+ } \
+ \
+ ((pl_local_t *)(frame->local))->update_mlock_enforced_flag = 1; \
+ } \
+ } while (0)
+
+#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \
+ _args...) \
+ ({ \
+ struct list_head contend; \
+ pl_inode_t *__pl_inode; \
+ call_stub_t *__stub; \
+ int32_t __error; \
+ INIT_LIST_HEAD(&contend); \
+ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \
+ &__pl_inode, &contend); \
+ if (__error < 0) { \
+ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \
+ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \
+ &contend); \
+ } else if (__error == 0) { \
+ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \
+ _loc2); \
+ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \
+ FIRST_CHILD(_xl)->fops->_fop, ##_args); \
+ } \
+ __error; \
+ })
+
gf_boolean_t
pl_has_xdata_requests(dict_t *xdata)
{
- char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT,
- GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT,
- GLUSTERFS_PARENT_ENTRYLK, NULL};
+ static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT,
+ GLUSTERFS_INODELK_COUNT,
+ GLUSTERFS_INODELK_DOM_COUNT,
+ GLUSTERFS_POSIXLK_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK,
+ GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS,
+ NULL};
+ static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT),
+ SLEN(GLUSTERFS_INODELK_COUNT),
+ SLEN(GLUSTERFS_INODELK_DOM_COUNT),
+ SLEN(GLUSTERFS_POSIXLK_COUNT),
+ SLEN(GLUSTERFS_PARENT_ENTRYLK),
+ SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS),
+ 0};
int i = 0;
if (!xdata)
return _gf_false;
for (i = 0; reqs[i]; i++)
- if (dict_get(xdata, reqs[i]))
+ if (dict_getn(xdata, reqs[i], reqs_size[i]))
return _gf_true;
return _gf_false;
}
+static int
+dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
void
pl_get_xdata_requests(pl_local_t *local, dict_t *xdata)
{
if (!local || !xdata)
return;
- if (dict_get(xdata, GLUSTERFS_ENTRYLK_COUNT)) {
+ GF_ASSERT(local->xdata == NULL);
+ local->xdata = dict_copy_with_ref(xdata, NULL);
+
+ if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) {
local->entrylk_count_req = 1;
- dict_del(xdata, GLUSTERFS_ENTRYLK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_INODELK_COUNT)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_INODELK_COUNT)) {
local->inodelk_count_req = 1;
- dict_del(xdata, GLUSTERFS_INODELK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT);
+ }
+ if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) {
+ local->multiple_dom_lk_requests = 1;
+ dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS);
+ dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
+ dict_delete_domain_key, NULL);
}
- local->inodelk_dom_count_req = dict_get(xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ local->inodelk_dom_count_req = dict_get_sizen(xdata,
+ GLUSTERFS_INODELK_DOM_COUNT);
if (local->inodelk_dom_count_req) {
data_ref(local->inodelk_dom_count_req);
- dict_del(xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_POSIXLK_COUNT)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_POSIXLK_COUNT)) {
local->posixlk_count_req = 1;
- dict_del(xdata, GLUSTERFS_POSIXLK_COUNT);
+ dict_del_sizen(xdata, GLUSTERFS_POSIXLK_COUNT);
}
- if (dict_get(xdata, GLUSTERFS_PARENT_ENTRYLK)) {
+ if (dict_get_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK)) {
local->parent_entrylk_req = 1;
- dict_del(xdata, GLUSTERFS_PARENT_ENTRYLK);
+ dict_del_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK);
}
}
@@ -187,20 +255,11 @@ pl_needs_xdata_response(pl_local_t *local)
if (!local)
return _gf_false;
- if (local->parent_entrylk_req)
- return _gf_true;
-
- if (local->entrylk_count_req)
- return _gf_true;
-
- if (local->inodelk_dom_count_req)
- return _gf_true;
-
- if (local->inodelk_count_req)
+ if (local->parent_entrylk_req || local->entrylk_count_req ||
+ local->inodelk_dom_count_req || local->inodelk_count_req ||
+ local->posixlk_count_req || local->multiple_dom_lk_requests)
return _gf_true;
- if (local->posixlk_count_req)
- return _gf_true;
return _gf_false;
}
@@ -221,8 +280,43 @@ pl_get_xdata_rsp_args(pl_local_t *local, char *fop, inode_t **parent,
}
}
-int32_t
-__get_posixlk_count(xlator_t *this, pl_inode_t *pl_inode)
+static inline int
+pl_track_io_fop_count(pl_local_t *local, xlator_t *this, pl_count_op_t op)
+{
+ pl_inode_t *pl_inode = NULL;
+
+ if (!local)
+ return -1;
+
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode)
+ return -1;
+
+ if (pl_inode->mlock_enforced && pl_inode->track_fop_wind_count) {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (op == DECREMENT) {
+ pl_inode->fop_wind_count--;
+ /* fop_wind_count can go negative when lock enforcement is
+ * enabled on unwind path of an IO. Hence the "<" comparision.
+ */
+ if (pl_inode->fop_wind_count <= 0) {
+ pthread_cond_broadcast(&pl_inode->check_fop_wind_count);
+ pl_inode->track_fop_wind_count = _gf_false;
+ pl_inode->fop_wind_count = 0;
+ }
+ } else {
+ pl_inode->fop_wind_count++;
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+ return 0;
+}
+
+static int32_t
+__get_posixlk_count(pl_inode_t *pl_inode)
{
posix_lock_t *lock = NULL;
int32_t count = 0;
@@ -237,10 +331,9 @@ get_posixlk_count(xlator_t *this, inode_t *inode)
{
pl_inode_t *pl_inode = NULL;
uint64_t tmp_pl_inode = 0;
- int ret = 0;
int32_t count = 0;
- ret = inode_ctx_get(inode, this, &tmp_pl_inode);
+ int ret = inode_ctx_get(inode, this, &tmp_pl_inode);
if (ret != 0) {
goto out;
}
@@ -249,7 +342,7 @@ get_posixlk_count(xlator_t *this, inode_t *inode)
pthread_mutex_lock(&pl_inode->mutex);
{
- count = __get_posixlk_count(this, pl_inode);
+ count = __get_posixlk_count(pl_inode);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -265,10 +358,10 @@ pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename,
int32_t maxcount = -1;
int ret = -1;
- if (!parent || !basename || !strlen(basename))
+ if (!parent || !basename)
goto out;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_PARENT_ENTRYLK);
@@ -277,7 +370,7 @@ pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename,
if (maxcount >= entrylk)
return;
out:
- ret = dict_set_int32(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_PARENT_ENTRYLK);
@@ -293,7 +386,7 @@ pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_ENTRYLK_COUNT);
@@ -302,7 +395,7 @@ pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_ENTRYLK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_ENTRYLK_COUNT);
@@ -318,7 +411,7 @@ pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_INODELK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_INODELK_COUNT);
@@ -327,7 +420,7 @@ pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_INODELK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0,
"Failed to set count for "
@@ -347,7 +440,7 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
int ret = -1;
if (keep_max) {
- ret = dict_get_int32(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
if (ret < 0)
gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
GLUSTERFS_POSIXLK_COUNT);
@@ -356,7 +449,7 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
if (maxcount >= count)
return;
- ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, count);
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, count);
if (ret < 0) {
gf_msg_debug(this->name, 0, " dict_set failed on key %s",
GLUSTERFS_POSIXLK_COUNT);
@@ -364,6 +457,80 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
}
void
+pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict,
+ char *domname, gf_boolean_t keep_max, char *key)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32(dict, key, &maxcount);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
+ GLUSTERFS_INODELK_COUNT);
+ }
+ count = get_inodelk_count(this, inode, domname);
+ if (maxcount >= count)
+ return;
+
+ ret = dict_set_int32(dict, key, count);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set count for "
+ "key %s",
+ key);
+ }
+
+ return;
+}
+
+static int
+pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ multi_dom_lk_data *d = data;
+ char *tmp_key = NULL;
+ char *save_ptr = NULL;
+
+ tmp_key = gf_strdup(key);
+ if (!tmp_key)
+ return -1;
+
+ strtok_r(tmp_key, ":", &save_ptr);
+ if (!*save_ptr) {
+ if (tmp_key)
+ GF_FREE(tmp_key);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL,
+ "Could not tokenize domain string from key %s", key);
+ return -1;
+ }
+
+ pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr,
+ d->keep_max, key);
+ if (tmp_key)
+ GF_FREE(tmp_key);
+
+ return 0;
+}
+
+void
+pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local,
+ inode_t *inode, dict_t *dict,
+ gf_boolean_t keep_max)
+{
+ multi_dom_lk_data data;
+
+ data.this = this;
+ data.inode = inode;
+ data.xdata_rsp = dict;
+ data.keep_max = keep_max;
+
+ dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
+ pl_inodelk_xattr_fill_multiple, &data);
+}
+
+void
pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
inode_t *inode, char *name, dict_t *xdata,
gf_boolean_t max_lock)
@@ -371,41 +538,28 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
if (!xdata || !local)
return;
- if (local->parent_entrylk_req && parent && name && strlen(name))
+ if (local->parent_entrylk_req && parent && name && name[0] != '\0')
pl_parent_entrylk_xattr_fill(this, parent, name, xdata, max_lock);
- if (local->entrylk_count_req && inode)
+ if (!inode)
+ return;
+
+ if (local->entrylk_count_req)
pl_entrylk_xattr_fill(this, inode, xdata, max_lock);
- if (local->inodelk_dom_count_req && inode)
+ if (local->inodelk_dom_count_req)
pl_inodelk_xattr_fill(this, inode, xdata,
data_to_str(local->inodelk_dom_count_req),
max_lock);
- if (local->inodelk_count_req && inode)
+ if (local->inodelk_count_req)
pl_inodelk_xattr_fill(this, inode, xdata, NULL, max_lock);
- if (local->posixlk_count_req && inode)
+ if (local->posixlk_count_req)
pl_posixlk_xattr_fill(this, inode, xdata, max_lock);
-}
-
-/* Return true in case we need to ensure mandatory-locking
- * semnatics under different modes.
- */
-gf_boolean_t
-pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode)
-{
- posix_locks_private_t *priv = NULL;
-
- priv = THIS->private;
-
- if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
- return _gf_true;
- else if (priv->mandatory_mode == MLK_FORCED ||
- priv->mandatory_mode == MLK_OPTIMAL)
- return _gf_true;
- return _gf_false;
+ if (local->multiple_dom_lk_requests)
+ pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock);
}
/* Checks whether the region where fop is acting upon conflicts
@@ -420,15 +574,19 @@ pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
int ret = 0;
if (!__rw_allowable(pl_inode, region, op)) {
- if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
+ if (pl_inode->mlock_enforced) {
+ *can_block = _gf_false;
+ } else if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
gf_log("locks", GF_LOG_TRACE,
"returning EAGAIN"
" because fd is O_NONBLOCK");
*can_block = _gf_false;
- } else
+ } else {
*can_block = _gf_true;
- } else
+ }
+ } else {
ret = 1;
+ }
return ret;
}
@@ -436,9 +594,7 @@ pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
static pl_fdctx_t *
pl_new_fdctx()
{
- pl_fdctx_t *fdctx = NULL;
-
- fdctx = GF_CALLOC(1, sizeof(*fdctx), gf_locks_mt_pl_fdctx_t);
+ pl_fdctx_t *fdctx = GF_MALLOC(sizeof(*fdctx), gf_locks_mt_pl_fdctx_t);
GF_VALIDATE_OR_GOTO("posix-locks", fdctx, out);
INIT_LIST_HEAD(&fdctx->locks_list);
@@ -471,7 +627,9 @@ pl_check_n_create_fdctx(xlator_t *this, fd_t *fd)
if (ret != 0) {
GF_FREE(fdctx);
fdctx = NULL;
+ UNLOCK(&fd->lock);
gf_log(this->name, GF_LOG_DEBUG, "failed to set fd ctx");
+ goto out;
}
}
unlock:
@@ -486,8 +644,10 @@ pl_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -495,6 +655,8 @@ int
pl_discard_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_discard_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
return 0;
@@ -504,6 +666,7 @@ int32_t
pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -520,17 +683,28 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
- if (!pl_inode) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -544,15 +718,19 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_DISCARD,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -581,7 +759,8 @@ pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, NULL, NULL, NULL);
+ PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -591,8 +770,10 @@ pl_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -600,6 +781,8 @@ int
pl_zerofill_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_zerofill_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
return 0;
@@ -609,6 +792,7 @@ int32_t
pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -625,17 +809,28 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
- if (!pl_inode) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -649,15 +844,19 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_ZEROFILL,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -686,8 +885,8 @@ pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL,
- NULL);
+ PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -697,24 +896,16 @@ pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- pl_local_t *local = NULL;
-
- local = frame->local;
+ pl_local_t *local = frame->local;
- if (local->op == GF_FOP_TRUNCATE)
- loc_wipe(&local->loc[0]);
-
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
+ pl_track_io_fop_count(local, this, DECREMENT);
if (local->op == GF_FOP_TRUNCATE)
- STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
else
- STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
@@ -722,6 +913,8 @@ int
pl_ftruncate_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
@@ -731,6 +924,8 @@ int
pl_truncate_cont(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
@@ -741,7 +936,7 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
dict_t *xdata)
{
- pl_local_t *local = NULL;
+ pl_local_t *local = frame->local;
inode_t *inode = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
@@ -755,7 +950,6 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_boolean_t can_block = _gf_true;
int allowed = 1;
- local = frame->local;
GF_VALIDATE_OR_GOTO("locks", this, unwind);
if (op_ret != 0) {
@@ -770,17 +964,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
else
inode = local->fd->inode;
- pl_inode = pl_inode_get(this, inode);
+ local->inode = inode_ref(inode);
+
+ pl_inode = pl_inode_get(this, inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = local->offset;
@@ -794,15 +990,19 @@ truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
allowed = pl_is_fop_allowed(pl_inode, &region, local->fd, local->op,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -855,21 +1055,14 @@ unwind:
"ret: %d, error: %s",
op_ret, strerror(op_errno));
- if (local->op == GF_FOP_TRUNCATE)
- loc_wipe(&local->loc[0]);
- if (local->xdata)
- dict_unref(local->xdata);
- if (local->fd)
- fd_unref(local->fd);
-
switch (local->op) {
case GF_FOP_TRUNCATE:
- STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, buf,
- NULL, xdata);
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf,
+ NULL, xdata);
break;
case GF_FOP_FTRUNCATE:
- STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, buf,
- NULL, xdata);
+ PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, buf,
+ NULL, xdata);
break;
default:
break;
@@ -901,6 +1094,7 @@ pl_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
STACK_WIND(frame, truncate_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat, loc, NULL);
ret = 0;
+
unwind:
if (ret == -1) {
gf_log(this ? this->name : "locks", GF_LOG_ERROR,
@@ -1041,68 +1235,68 @@ pl_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
-int32_t
-pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
- dict_t *xdata)
+static int32_t
+pl_getxattr_clrlk(xlator_t *this, const char *name, inode_t *inode,
+ dict_t **dict, int32_t *op_errno)
{
- int32_t op_errno = EINVAL;
- int op_ret = -1;
int32_t bcount = 0;
int32_t gcount = 0;
- char key[PATH_MAX] = {
- 0,
- };
+ char *key = NULL;
char *lk_summary = NULL;
pl_inode_t *pl_inode = NULL;
- dict_t *dict = NULL;
clrlk_args args = {
0,
};
char *brickname = NULL;
+ int32_t op_ret = -1;
- if (!name)
- goto usual;
-
- if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)))
- goto usual;
+ *op_errno = EINVAL;
if (clrlk_parse_args(name, &args)) {
- op_errno = EINVAL;
+ *op_errno = EINVAL;
goto out;
}
- dict = dict_new();
- if (!dict) {
- op_errno = ENOMEM;
+ *dict = dict_new();
+ if (!*dict) {
+ *op_errno = ENOMEM;
goto out;
}
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
switch (args.type) {
case CLRLK_INODE:
case CLRLK_ENTRY:
- op_ret = clrlk_clear_lks_in_all_domains(
- this, pl_inode, &args, &bcount, &gcount, &op_errno);
- if (op_ret)
- goto out;
+ op_ret = clrlk_clear_lks_in_all_domains(this, pl_inode, &args,
+ &bcount, &gcount, op_errno);
break;
case CLRLK_POSIX:
op_ret = clrlk_clear_posixlk(this, pl_inode, &args, &bcount,
- &gcount, &op_errno);
- if (op_ret)
- goto out;
+ &gcount, op_errno);
break;
- case CLRLK_TYPE_MAX:
- op_errno = EINVAL;
- goto out;
+ default:
+ op_ret = -1;
+ *op_errno = EINVAL;
}
+ if (op_ret) {
+ if (args.type >= CLRLK_TYPE_MAX) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "clear locks: invalid lock type %d", args.type);
+ } else {
+ gf_log(this->name, GF_LOG_ERROR,
+ "clear locks of type %s failed: %s",
+ clrlk_type_names[args.type], strerror(*op_errno));
+ }
- op_ret = fetch_pathinfo(this, loc->inode, &op_errno, &brickname);
+ goto out;
+ }
+
+ op_ret = fetch_pathinfo(this, inode, op_errno, &brickname);
if (op_ret) {
gf_log(this->name, GF_LOG_WARNING, "Couldn't get brickname");
} else {
@@ -1117,43 +1311,62 @@ pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
if (!gcount && !bcount) {
if (gf_asprintf(&lk_summary, "No locks cleared.") == -1) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
- } else if (gf_asprintf(
- &lk_summary,
- "%s: %s blocked locks=%d "
- "granted locks=%d",
- (brickname == NULL) ? this->name : brickname,
- (args.type == CLRLK_INODE)
- ? "inode"
- : (args.type == CLRLK_ENTRY)
- ? "entry"
- : (args.type == CLRLK_POSIX) ? "posix" : " ",
- bcount, gcount) == -1) {
+ } else if (gf_asprintf(&lk_summary,
+ "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL) ? this->name : brickname,
+ clrlk_type_names[args.type], bcount, gcount) == -1) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
+ gf_log(this->name, GF_LOG_DEBUG, "%s", lk_summary);
- if (snprintf(key, sizeof(key), "%s", name) >= sizeof(key)) {
+ key = gf_strdup(name);
+ if (!key) {
op_ret = -1;
goto out;
}
- if (dict_set_dynstr(dict, key, lk_summary)) {
+ if (dict_set_dynstr(*dict, key, lk_summary)) {
op_ret = -1;
- op_errno = ENOMEM;
+ *op_errno = ENOMEM;
goto out;
}
op_ret = 0;
+
out:
GF_FREE(brickname);
- STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
-
GF_FREE(args.opts);
- if (op_ret && lk_summary)
+ GF_FREE(key);
+ if (op_ret) {
GF_FREE(lk_summary);
+ }
+
+ return op_ret;
+}
+
+int32_t
+pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ dict_t *dict = NULL;
+
+ if (!name)
+ goto usual;
+
+ if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ op_ret = pl_getxattr_clrlk(this, name, loc->inode, &dict, &op_errno);
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
+
if (dict)
dict_unref(dict);
return 0;
@@ -1219,7 +1432,7 @@ fetch_pathinfo(xlator_t *this, inode_t *inode, int32_t *op_errno,
goto out;
}
- ret = dict_get_str(dict, GF_XATTR_PATHINFO_KEY, brickname);
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, brickname);
if (ret)
goto out;
@@ -1242,15 +1455,12 @@ out:
int
pl_lockinfo_get_brickname(xlator_t *this, inode_t *inode, int32_t *op_errno)
{
- int ret = -1;
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char *brickname = NULL;
char *end = NULL;
char *tmp = NULL;
- priv = this->private;
-
- ret = fetch_pathinfo(this, inode, op_errno, &brickname);
+ int ret = fetch_pathinfo(this, inode, op_errno, &brickname);
if (ret)
goto out;
@@ -1278,12 +1488,10 @@ out:
char *
pl_lockinfo_key(xlator_t *this, inode_t *inode, int32_t *op_errno)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
char *key = NULL;
int ret = 0;
- priv = this->private;
-
if (priv->brickname == NULL) {
ret = pl_lockinfo_get_brickname(this, inode, op_errno);
if (ret < 0) {
@@ -1301,14 +1509,13 @@ int32_t
pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
int32_t *op_errno)
{
- pl_inode_t *pl_inode = NULL;
char *key = NULL, *buf = NULL;
int32_t op_ret = 0;
unsigned long fdnum = 0;
int32_t len = 0;
dict_t *tmp = NULL;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL);
if (!pl_inode) {
gf_log(this->name, GF_LOG_DEBUG, "Could not get inode.");
@@ -1348,8 +1555,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
goto out;
}
- len = dict_serialized_length(tmp);
- if (len < 0) {
+ op_ret = dict_allocate_and_serialize(tmp, (char **)&buf,
+ (unsigned int *)&len);
+ if (op_ret != 0) {
*op_errno = -op_ret;
op_ret = -1;
gf_log(this->name, GF_LOG_WARNING,
@@ -1359,24 +1567,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
goto out;
}
- buf = GF_CALLOC(1, len, gf_common_mt_char);
- if (buf == NULL) {
- op_ret = -1;
- *op_errno = ENOMEM;
- goto out;
- }
-
- op_ret = dict_serialize(tmp, buf);
- if (op_ret < 0) {
- *op_errno = -op_ret;
- op_ret = -1;
- gf_log(this->name, GF_LOG_WARNING,
- "dict_serialize failed (%s) while handling lockinfo "
- "for fd (ptr: %p inode-gfid:%s)",
- strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid));
- goto out;
- }
-
op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len);
if (op_ret < 0) {
*op_errno = -op_ret;
@@ -1429,6 +1619,11 @@ pl_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
}
goto unwind;
+ } else if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) ==
+ 0) {
+ op_ret = pl_getxattr_clrlk(this, name, fd->inode, &dict, &op_errno);
+
+ goto unwind;
} else {
goto usual;
}
@@ -1451,14 +1646,11 @@ int32_t
pl_migrate_locks(call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
int32_t *op_errno)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t newfd_num = 0;
posix_lock_t *l = NULL;
int32_t op_ret = 0;
+ uint64_t newfd_num = fd_to_fdnum(newfd);
- newfd_num = fd_to_fdnum(newfd);
-
- pl_inode = pl_inode_get(frame->this, newfd->inode);
+ pl_inode_t *pl_inode = pl_inode_get(frame->this, newfd->inode, NULL);
if (pl_inode == NULL) {
op_ret = -1;
*op_errno = EBADFD;
@@ -1487,11 +1679,10 @@ pl_fsetxattr_handle_lockinfo(call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
int len, int32_t *op_errno)
{
int32_t op_ret = -1;
- dict_t *lockinfo = NULL;
uint64_t oldfd_num = 0;
char *key = NULL;
- lockinfo = dict_new();
+ dict_t *lockinfo = dict_new();
if (lockinfo == NULL) {
op_ret = -1;
*op_errno = ENOMEM;
@@ -1537,6 +1728,27 @@ int32_t
pl_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -1546,12 +1758,14 @@ int32_t
pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- int32_t op_ret = 0, op_errno = 0;
+ int32_t op_errno = 0;
void *lockinfo_buf = NULL;
int len = 0;
+ char *name = NULL;
+ posix_locks_private_t *priv = this->private;
- op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, &lockinfo_buf,
- &len);
+ int32_t op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
if (lockinfo_buf == NULL) {
goto usual;
}
@@ -1564,12 +1778,17 @@ pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
usual:
PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, ((loc_t *)NULL), fd,
+ priv);
+
STACK_WIND(frame, pl_fsetxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
return 0;
unwind:
- STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+ PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno, NULL);
+
return 0;
}
@@ -1617,10 +1836,7 @@ pl_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
-
- pl_inode = pl_inode_get(this, fd->inode);
-
+ pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL);
if (!pl_inode) {
gf_log(this->name, GF_LOG_DEBUG, "Could not get inode.");
STACK_UNWIND_STRICT(flush, frame, -1, EBADFD, NULL);
@@ -1696,14 +1912,12 @@ pl_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int op_errno = EINVAL;
pl_inode_t *pl_inode = NULL;
posix_lock_t *l = NULL;
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
GF_VALIDATE_OR_GOTO("locks", this, unwind);
op_ret = 0, op_errno = 0;
- pl_inode = pl_inode_get(this, fd->inode);
+ pl_inode = pl_inode_get(this, fd->inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "Could not get inode");
op_ret = -1;
@@ -1784,7 +1998,8 @@ int
pl_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
STACK_WIND(frame, pl_create_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
xdata);
@@ -1796,6 +2011,8 @@ pl_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iovec *vector, int32_t count,
struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, vector, count, stbuf,
iobref, xdata);
@@ -1807,6 +2024,8 @@ pl_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, prebuf, postbuf,
xdata);
@@ -1829,6 +2048,10 @@ do_blocked_rw(pl_inode_t *pl_inode)
if (__rw_allowable(pl_inode, &rw->region, rw->stub->fop)) {
list_del_init(&rw->list);
list_add_tail(&rw->list, &wind_list);
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
}
}
}
@@ -1844,14 +2067,68 @@ do_blocked_rw(pl_inode_t *pl_inode)
return;
}
+/* when mandatory lock is enforced:
+ If an IO request comes on a region which is out of the boundary of the
+ granted mandatory lock, it will be rejected.
+
+ Note: There is no IO blocking with mandatory lock enforced as it may be
+ a stale data from an old client.
+ */
+gf_boolean_t static within_range(posix_lock_t *existing, posix_lock_t *new)
+{
+ if (existing->fl_start <= new->fl_start && existing->fl_end >= new->fl_end)
+ return _gf_true;
+
+ return _gf_false;
+}
+
static int
__rw_allowable(pl_inode_t *pl_inode, posix_lock_t *region, glusterfs_fop_t op)
{
posix_lock_t *l = NULL;
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = THIS->private;
int ret = 1;
- priv = THIS->private;
+ if (pl_inode->mlock_enforced) {
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ /*
+ with lock enforced (fencing) there should not be any blocking
+ lock coexisting.
+ */
+ if (same_owner(l, region)) {
+ /* Should range check be strict for same owner with fencing? */
+ if (locks_overlap(l, region)) {
+ if (within_range(l, region)) {
+ return 1;
+ } else {
+ /*
+ Should we allow read fop if it does not fit it in the
+ range?
+ if (op == GF_FOP_READ && l->fl_type != F_WRLCK) {
+ return 1;
+ }
+ */
+ return 0;
+ }
+ }
+ } else {
+ if (locks_overlap(l, region)) {
+ /*
+ with fencing should a read from a different owner be
+ allowed if the mandatory lock taken is F_RDLCK?
+ if (op == GF_FOP_READ && l->fl_type != F_WRLCK) {
+ return 1;
+ }
+ */
+ return 0;
+ }
+ }
+ }
+
+ /* No lock has been taken by this owner */
+ return 0;
+ }
list_for_each_entry(l, &pl_inode->ext_list, list)
{
@@ -1875,6 +2152,8 @@ int
pl_readv_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_readv_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
@@ -1885,6 +2164,7 @@ int
pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -1901,18 +2181,26 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ if (!frame->local) {
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -1926,15 +2214,19 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_READ,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
+ } else if (!can_block) {
op_errno = EAGAIN;
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -1965,8 +2257,8 @@ pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
- NULL);
+ PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, NULL, 0, NULL,
+ NULL, NULL);
return 0;
}
@@ -1976,6 +2268,8 @@ pl_writev_cont(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int count, off_t offset, uint32_t flags,
struct iobref *iobref, dict_t *xdata)
{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
STACK_WIND(frame, pl_writev_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
flags, iobref, xdata);
@@ -1988,6 +2282,7 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
dict_t *xdata)
{
+ pl_local_t *local = NULL;
pl_inode_t *pl_inode = NULL;
pl_rw_req_t *rw = NULL;
posix_lock_t region = {
@@ -2004,18 +2299,26 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
GF_VALIDATE_OR_GOTO("locks", this, unwind);
- pl_inode = pl_inode_get(this, fd->inode);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ if (!frame->local) {
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
goto unwind;
}
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
- enabled = pl_is_mandatory_locking_enabled(pl_inode);
-
if (frame->root->pid < 0)
enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
if (enabled) {
region.fl_start = offset;
@@ -2029,15 +2332,24 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
{
allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_WRITE,
&can_block);
- if (allowed == 1)
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
goto unlock;
- else if (!can_block) {
- op_errno = EAGAIN;
+ } else if (!can_block) {
+ if (pl_inode->mlock_enforced) {
+ op_errno = EBUSY;
+ } else {
+ op_errno = EAGAIN;
+ }
+
op_ret = -1;
goto unlock;
}
- rw = GF_CALLOC(1, sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
if (!rw) {
op_errno = ENOMEM;
op_ret = -1;
@@ -2068,7 +2380,8 @@ pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
}
unwind:
if (op_ret == -1)
- STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+ PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
return 0;
}
@@ -2076,29 +2389,25 @@ unwind:
static int
__fd_has_locks(pl_inode_t *pl_inode, fd_t *fd)
{
- int found = 0;
posix_lock_t *l = NULL;
list_for_each_entry(l, &pl_inode->ext_list, list)
{
if (l->fd_num == fd_to_fdnum(fd)) {
- found = 1;
- break;
+ return 1;
}
}
- return found;
+ return 0;
}
static posix_lock_t *
lock_dup(posix_lock_t *lock)
{
- posix_lock_t *new_lock = NULL;
-
- new_lock = new_posix_lock(&lock->user_flock, lock->client, lock->client_pid,
- &lock->owner, (fd_t *)lock->fd_num,
- lock->lk_flags, lock->blocking);
- return new_lock;
+ int32_t op_errno = 0;
+ return new_posix_lock(&lock->user_flock, lock->client, lock->client_pid,
+ &lock->owner, (fd_t *)lock->fd_num, lock->lk_flags,
+ lock->blocking, &op_errno);
}
static int
@@ -2127,14 +2436,7 @@ __dup_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx)
static int
__copy_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx)
{
- int ret = 0;
-
- ret = __dup_locks_to_fdctx(pl_inode, fd, fdctx);
- if (ret)
- goto out;
-
-out:
- return ret;
+ return __dup_locks_to_fdctx(pl_inode, fd, fdctx);
}
static void
@@ -2205,9 +2507,10 @@ pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd,
pthread_mutex_lock(&pl_inode->mutex);
{
if (!__fd_has_locks(pl_inode, fd)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG, "fd=%p has no active locks", fd);
ret = 0;
- goto unlock;
+ goto out;
}
gf_log(this->name, GF_LOG_DEBUG, "There are active locks on fd");
@@ -2231,15 +2534,17 @@ pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd,
"fdctx present -> returning the next lock");
ret = __set_next_lock_fd(fdctx, reqlock);
if (ret) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG,
"could not get next lock of fd");
- goto unlock;
+ goto out;
}
}
}
unlock:
pthread_mutex_unlock(&pl_inode->mutex);
+out:
return ret;
}
@@ -2252,12 +2557,10 @@ pl_metalock_is_active(pl_inode_t *pl_inode)
return 1;
}
-int
-__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock, int can_block)
+void
+__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock)
{
list_add_tail(&reqlock->list, &pl_inode->queued_locks);
-
- return 0;
}
int
@@ -2270,13 +2573,12 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
int can_block = 0;
posix_lock_t *reqlock = NULL;
posix_lock_t *conf = NULL;
- int ret = 0;
uint32_t lk_flags = 0;
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
+ pl_local_t *local = NULL;
+ short lock_type = 0;
- ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags);
+ int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags);
if (ret == 0) {
if (priv->mandatory_mode == MLK_NONE)
gf_log(this->name, GF_LOG_DEBUG,
@@ -2305,7 +2607,17 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
flock->l_len = labs(flock->l_len);
}
- pl_inode = pl_inode_get(this, fd->inode);
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ } else {
+ frame->local = local;
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
if (!pl_inode) {
op_ret = -1;
op_errno = ENOMEM;
@@ -2313,11 +2625,11 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
}
reqlock = new_posix_lock(flock, frame->root->client, frame->root->pid,
- &frame->root->lk_owner, fd, lk_flags, can_block);
+ &frame->root->lk_owner, fd, lk_flags, can_block,
+ &op_errno);
if (!reqlock) {
op_ret = -1;
- op_errno = ENOMEM;
goto unwind;
}
@@ -2409,6 +2721,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
case F_SETLK:
reqlock->frame = frame;
reqlock->this = this;
+ lock_type = flock->l_type;
pthread_mutex_lock(&pl_inode->mutex);
{
@@ -2430,10 +2743,23 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
goto out;
}
+ if (reqlock->fl_type != F_UNLCK && pl_inode->mlock_enforced) {
+ ret = pl_lock_preempt(pl_inode, reqlock);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "lock preempt failed");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock(reqlock);
+ goto out;
+ }
+
+ pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL);
+ goto unwind;
+ }
+
ret = pl_setlk(this, pl_inode, reqlock, can_block);
if (ret == -1) {
- if ((can_block) && (F_UNLCK != flock->l_type)) {
- pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL);
+ if ((can_block) && (F_UNLCK != lock_type)) {
goto out;
}
gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
@@ -2455,7 +2781,7 @@ unwind:
pl_trace_out(this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
pl_update_refkeeper(this, fd->inode);
- STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata);
+ PL_STACK_UNWIND(lk, xdata, frame, op_ret, op_errno, flock, xdata);
out:
return 0;
}
@@ -2488,7 +2814,7 @@ pl_forget(xlator_t *this, inode_t *inode)
INIT_LIST_HEAD(&inodelks_released);
INIT_LIST_HEAD(&entrylks_released);
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode)
return 0;
@@ -2562,25 +2888,33 @@ pl_forget(xlator_t *this, inode_t *inode)
}
pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list)
- {
- STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock, NULL);
- __destroy_lock(ext_l);
+ if (!list_empty(&posixlks_released)) {
+ list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list)
+ {
+ STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock,
+ NULL);
+ __destroy_lock(ext_l);
+ }
}
- list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released, blocked_locks)
- {
- STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL);
- __pl_inodelk_unref(ino_l);
+ if (!list_empty(&inodelks_released)) {
+ list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released,
+ blocked_locks)
+ {
+ STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref(ino_l);
+ }
}
- list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released,
- blocked_locks)
- {
- STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL);
- GF_FREE((char *)entry_l->basename);
- GF_FREE(entry_l->connection_id);
- GF_FREE(entry_l);
+ if (!list_empty(&entrylks_released)) {
+ list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released,
+ blocked_locks)
+ {
+ STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE((char *)entry_l->basename);
+ GF_FREE(entry_l->connection_id);
+ GF_FREE(entry_l);
+ }
}
pthread_mutex_destroy(&pl_inode->mutex);
@@ -2654,11 +2988,85 @@ out:
return ret;
}
+static int32_t
+pl_request_link_count(dict_t **pxdata)
+{
+ dict_t *xdata;
+
+ xdata = *pxdata;
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ return ENOMEM;
+ }
+ } else {
+ dict_ref(xdata);
+ }
+
+ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
+ dict_unref(xdata);
+ return ENOMEM;
+ }
+
+ *pxdata = xdata;
+
+ return 0;
+}
+
+static int32_t
+pl_check_link_count(dict_t *xdata)
+{
+ int32_t count;
+
+ /* In case we are unable to read the link count from xdata, we take a
+ * conservative approach and return -2, which will prevent the inode from
+ * being considered deleted. In fact it will cause link tracking for this
+ * inode to be disabled completely to avoid races. */
+
+ if (xdata == NULL) {
+ return -2;
+ }
+
+ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
+ return -2;
+ }
+
+ return count;
+}
+
int32_t
pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
struct iatt *postparent)
{
+ pl_inode_t *pl_inode;
+
+ if (op_ret >= 0) {
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (pl_inode == NULL) {
+ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* We only update the link count if we previously didn't know it.
+ * Doing it always can lead to races since lookup is not executed
+ * atomically most of the times. */
+ if (pl_inode->links == -2) {
+ pl_inode->links = pl_check_link_count(xdata);
+ if (buf->ia_type == IA_IFDIR) {
+ /* Directories have at least 2 links. To avoid special handling
+ * for directories, we simply decrement the value here to make
+ * them equivalent to regular files. */
+ pl_inode->links--;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
return 0;
@@ -2667,9 +3075,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ int32_t error;
+
+ error = pl_request_link_count(&xdata);
+ if (error == 0) {
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ } else {
+ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
+ }
return 0;
}
@@ -2730,9 +3146,8 @@ pl_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
lock_migration_info_t *
gf_mig_info_for_lock(posix_lock_t *lock)
{
- lock_migration_info_t *new = NULL;
-
- new = GF_CALLOC(1, sizeof(lock_migration_info_t), gf_common_mt_lock_mig);
+ lock_migration_info_t *new = GF_MALLOC(sizeof(lock_migration_info_t),
+ gf_common_mt_lock_mig);
if (new == NULL) {
goto out;
}
@@ -2760,7 +3175,7 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
{
if (list_empty(&pl_inode->ext_list)) {
count = 0;
- goto out;
+ goto unlock;
}
list_for_each_entry(temp, &pl_inode->ext_list, list)
@@ -2770,6 +3185,7 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
newlock = gf_mig_info_for_lock(temp);
if (!newlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "lock_dup failed");
count = -1;
goto out;
@@ -2780,8 +3196,9 @@ pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
}
}
-out:
+unlock:
pthread_mutex_unlock(&pl_inode->mutex);
+out:
return count;
}
@@ -2797,7 +3214,7 @@ pl_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
INIT_LIST_HEAD(&locks.list);
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode = pl_inode_get(this, loc->inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed");
@@ -2837,9 +3254,8 @@ __pl_metalk_ref(pl_meta_lock_t *lock)
pl_meta_lock_t *
new_meta_lock(call_frame_t *frame, xlator_t *this)
{
- pl_meta_lock_t *lock = NULL;
-
- lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_meta_lock_t);
+ pl_meta_lock_t *lock = GF_CALLOC(1, sizeof(*lock),
+ gf_locks_mt_pl_meta_lock_t);
if (!lock) {
gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
@@ -2913,7 +3329,7 @@ pl_metalk(call_frame_t *frame, xlator_t *this, inode_t *inode)
pl_meta_lock_t *reqlk = NULL;
pl_ctx_t *ctx = NULL;
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
"pl_inode mem allocation failedd");
@@ -2987,9 +3403,8 @@ out:
return ret;
}
-void
-__unwind_queued_locks(xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *tmp_list)
+static void
+__unwind_queued_locks(pl_inode_t *pl_inode, struct list_head *tmp_list)
{
if (list_empty(&pl_inode->queued_locks))
return;
@@ -2997,9 +3412,8 @@ __unwind_queued_locks(xlator_t *this, pl_inode_t *pl_inode,
list_splice_init(&pl_inode->queued_locks, tmp_list);
}
-void
-__unwind_blocked_locks(xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *tmp_list)
+static void
+__unwind_blocked_locks(pl_inode_t *pl_inode, struct list_head *tmp_list)
{
posix_lock_t *lock = NULL;
posix_lock_t *tmp = NULL;
@@ -3047,7 +3461,7 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
goto out;
}
- pl_inode = pl_inode_get(this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
if (!pl_inode) {
ret = -1;
goto out;
@@ -3058,12 +3472,12 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
pthread_mutex_lock(&pl_inode->mutex);
{
/* Unwind queued locks regardless of migration status */
- __unwind_queued_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_queued_locks(pl_inode, &tmp_posixlk_list);
/* Unwind blocked locks only for successful migration */
- if (dict_get(dict, "status")) {
+ if (dict_get_sizen(dict, "status")) {
/* unwind all blocked locks */
- __unwind_blocked_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_blocked_locks(pl_inode, &tmp_posixlk_list);
}
/* unlock metalk */
@@ -3090,7 +3504,7 @@ pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
inode_unref(pl_inode->inode);
}
- if (dict_get(dict, "status"))
+ if (dict_get_sizen(dict, "status"))
pl_inode->migrated = _gf_true;
else
pl_inode->migrated = _gf_false;
@@ -3119,6 +3533,34 @@ int32_t
pl_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ while (pl_inode->fop_wind_count > 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "waiting for existing fops (count %d) to drain for "
+ "gfid %s",
+ pl_inode->fop_wind_count, uuid_utoa(pl_inode->gfid));
+ pthread_cond_wait(&pl_inode->check_fop_wind_count,
+ &pl_inode->mutex);
+ }
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata);
return 0;
}
@@ -3130,15 +3572,16 @@ pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int op_ret = 0;
int op_errno = EINVAL;
dict_t *xdata_rsp = NULL;
+ char *name = NULL;
+ posix_locks_private_t *priv = this->private;
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- if (dict_get(dict, GF_META_LOCK_KEY)) {
+ if (dict_get_sizen(dict, GF_META_LOCK_KEY)) {
op_ret = pl_metalk(frame, this, loc->inode);
- } else if (dict_get(dict, GF_META_UNLOCK_KEY)) {
+ } else if (dict_get_sizen(dict, GF_META_UNLOCK_KEY)) {
op_ret = pl_metaunlock(frame, this, loc->inode, dict);
-
} else {
goto usual;
}
@@ -3148,9 +3591,17 @@ pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
return 0;
usual:
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, ((fd_t *)NULL),
+ priv);
+
STACK_WIND(frame, pl_setxattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata);
+
+ return 0;
}
void
@@ -3159,10 +3610,10 @@ pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner,
time_t *blkd_time, gf_boolean_t active)
{
char *type_str = NULL;
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
@@ -3213,10 +3664,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
{
pl_dom_list_t *dom = NULL;
pl_entry_lock_t *lock = NULL;
- char blocked[256] = {
+ char blocked[GF_TIMESTR_SIZE] = {
0,
};
- char granted[256] = {
+ char granted[GF_TIMESTR_SIZE] = {
0,
};
int count = 0;
@@ -3236,10 +3687,10 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->entrylk_list, domain_list)
{
- gf_time_fmt(granted, sizeof(granted), lock->granted_time.tv_sec,
+ gf_time_fmt(granted, sizeof(granted), lock->granted_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count);
- if (lock->blkd_time.tv_sec == 0) {
+ if (lock->blkd_time == 0) {
snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
: "ENTRYLK_WRLCK",
@@ -3247,7 +3698,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
lkowner_utoa(&lock->owner), lock->client,
lock->connection_id, granted);
} else {
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT,
lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
@@ -3264,7 +3715,7 @@ __dump_entrylks(pl_inode_t *pl_inode)
list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
{
- gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time.tv_sec,
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
gf_timefmt_FT);
gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count);
@@ -3316,9 +3767,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id,
- &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
- _gf_true);
+ lock->client, lock->connection_id, &lock->granted_time,
+ &lock->blkd_time, _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3330,8 +3780,8 @@ __dump_inodelks(pl_inode_t *pl_inode)
count);
SET_FLOCK_PID(&lock->user_flock, lock);
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, lock->connection_id, 0,
- &lock->blkd_time.tv_sec, _gf_false);
+ lock->client, lock->connection_id, 0, &lock->blkd_time,
+ _gf_false);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3364,9 +3814,8 @@ __dump_posixlks(pl_inode_t *pl_inode)
gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count,
lock->blocked ? "BLOCKED" : "ACTIVE");
pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
- lock->client, NULL, &lock->granted_time.tv_sec,
- &lock->blkd_time.tv_sec,
- (lock->blocked) ? _gf_false : _gf_true);
+ lock->client, lock->client_uid, &lock->granted_time,
+ &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true);
gf_proc_dump_write(key, "%s", tmp);
count++;
@@ -3445,11 +3894,15 @@ unlock:
__dump_inodelks(pl_inode);
}
- count = __get_posixlk_count(this, pl_inode);
+ count = __get_posixlk_count(pl_inode);
if (count) {
gf_proc_dump_write("posixlk-count", "%d", count);
__dump_posixlks(pl_inode);
}
+
+ gf_proc_dump_write("links", "%d", pl_inode->links);
+ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
+ gf_proc_dump_write("removed", "%u", pl_inode->removed);
}
pthread_mutex_unlock(&pl_inode->mutex);
@@ -3558,9 +4011,9 @@ pl_metalk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
* unwind all queued and blocked locks to check
* migration status and find the correct
* destination */
- __unwind_queued_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_queued_locks(pl_inode, &tmp_posixlk_list);
- __unwind_blocked_locks(this, pl_inode, &tmp_posixlk_list);
+ __unwind_blocked_locks(pl_inode, &tmp_posixlk_list);
list_del_init(&meta_lock->list);
@@ -3592,10 +4045,7 @@ unlock:
static int
pl_client_disconnect_cbk(xlator_t *this, client_t *client)
{
- pl_ctx_t *pl_ctx = NULL;
-
- pl_ctx = pl_ctx_get(client, this);
-
+ pl_ctx_t *pl_ctx = pl_ctx_get(client, this);
if (pl_ctx) {
pl_inodelk_client_cleanup(this, pl_ctx);
pl_entrylk_client_cleanup(this, pl_ctx);
@@ -3632,10 +4082,9 @@ pl_client_destroy_cbk(xlator_t *this, client_t *client)
int
reconfigure(xlator_t *this, dict_t *options)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = this->private;
int ret = -1;
-
- priv = this->private;
+ char *tmp_str = NULL;
GF_OPTION_RECONF("trace", priv->trace, options, bool, out);
@@ -3657,6 +4106,20 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("notify-contention-delay", priv->notify_contention_delay,
options, uint32, out);
+ GF_OPTION_RECONF("mandatory-locking", tmp_str, options, str, out);
+
+ GF_OPTION_RECONF("enforce-mandatory-lock", priv->mlock_enforced, options,
+ bool, out);
+
+ if (!strcmp(tmp_str, "forced"))
+ priv->mandatory_mode = MLK_FORCED;
+ else if (!strcmp(tmp_str, "file"))
+ priv->mandatory_mode = MLK_FILE_BASED;
+ else if (!strcmp(tmp_str, "optimal"))
+ priv->mandatory_mode = MLK_OPTIMAL;
+ else
+ priv->mandatory_mode = MLK_NONE;
+
ret = 0;
out:
@@ -3704,6 +4167,7 @@ init(xlator_t *this)
priv->mandatory_mode = MLK_OPTIMAL;
else
priv->mandatory_mode = MLK_NONE;
+
tmp_str = NULL;
GF_OPTION_INIT("trace", priv->trace, bool, out);
@@ -3723,6 +4187,8 @@ init(xlator_t *this)
GF_OPTION_INIT("notify-contention-delay", priv->notify_contention_delay,
uint32, out);
+ GF_OPTION_INIT("enforce-mandatory-lock", priv->mlock_enforced, bool, out);
+
this->local_pool = mem_pool_new(pl_local_t, 32);
if (!this->local_pool) {
ret = -1;
@@ -3741,19 +4207,21 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
- posix_locks_private_t *priv = NULL;
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
if (!priv)
- return 0;
+ return;
this->private = NULL;
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
GF_FREE(priv->brickname);
GF_FREE(priv);
- return 0;
+ return;
}
int
@@ -3780,8 +4248,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
struct iatt *postoldparent, struct iatt *prenewparent,
struct iatt *postnewparent, dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent,
postoldparent, prenewparent, postnewparent, xdata);
+
return 0;
}
@@ -3789,19 +4260,23 @@ int32_t
pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, oldloc, newloc);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename,
+ pl_rename_cbk, oldloc, newloc, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
- STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
return 0;
}
posix_lock_t *
gf_lkmig_info_to_posix_lock(call_frame_t *frame, lock_migration_info_t *lmi)
{
- posix_lock_t *lock = NULL;
-
- lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t);
+ posix_lock_t *lock = GF_CALLOC(1, sizeof(posix_lock_t),
+ gf_locks_mt_posix_lock_t);
if (!lock)
goto out;
@@ -3849,6 +4324,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
/* Just making sure the activelk list is empty. Should not
* happen though*/
if (!list_empty(&pl_inode->ext_list)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "invalid locks found");
ret = -1;
@@ -3857,6 +4333,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
/* This list also should not be empty */
if (list_empty(&locklist->list)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "empty lock list");
ret = -1;
@@ -3867,6 +4344,7 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
{
newlock = gf_lkmig_info_to_posix_lock(frame, temp);
if (!newlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
"mem allocation failed for newlock");
@@ -3876,12 +4354,10 @@ pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
list_add_tail(&newlock->list, &pl_inode->ext_list);
}
}
-
-out:
/*TODO: What if few lock add failed with ENOMEM. Should the already
* added locks be clearted */
pthread_mutex_unlock(&pl_inode->mutex);
-
+out:
return ret;
}
@@ -3889,12 +4365,11 @@ static int
pl_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
lock_migration_info_t *locklist, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
int op_ret = 0;
int op_errno = 0;
int ret = 0;
- pl_inode = pl_inode_get(this, loc->inode);
+ pl_inode_t *pl_inode = pl_inode_get(this, loc->inode, NULL);
if (!pl_inode) {
gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed");
@@ -3917,8 +4392,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -3926,9 +4404,14 @@ int32_t
pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink,
+ pl_unlink_cbk, loc, xflag, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -3946,7 +4429,7 @@ int
pl_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_mkdir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
return 0;
@@ -3964,7 +4447,7 @@ pl_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_stat_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat, loc, xdata);
return 0;
@@ -3984,7 +4467,7 @@ int
pl_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t rdev, mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_mknod_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
return 0;
@@ -3995,8 +4478,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent,
postparent, xdata);
+
return 0;
}
@@ -4004,9 +4490,14 @@ int
pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
- STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir,
+ pl_rmdir_cbk, loc, xflags, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL);
+ }
+
return 0;
}
@@ -4025,7 +4516,7 @@ int
pl_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
loc_t *loc, mode_t umask, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_symlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
return 0;
@@ -4036,6 +4527,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t op_errno, inode_t *inode, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
+ pl_inode_t *pl_inode = (pl_inode_t *)cookie;
+
+ if (op_ret >= 0) {
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* TODO: can happen pl_inode->links == 0 ? */
+ if (pl_inode->links >= 0) {
+ pl_inode->links++;
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
preparent, postparent, xdata);
return 0;
@@ -4045,9 +4549,18 @@ int
pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, oldloc, newloc);
- STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ pl_inode_t *pl_inode;
+
+ pl_inode = pl_inode_get(this, oldloc->inode, NULL);
+ if (pl_inode == NULL) {
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
}
@@ -4121,7 +4634,7 @@ pl_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int
pl_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_statfs_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->statfs, loc, xdata);
return 0;
@@ -4131,6 +4644,28 @@ int32_t
pl_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_false;
+ pl_inode->check_mlock_info = _gf_false;
+ pl_inode->track_fop_wind_count = _gf_true;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -4140,16 +4675,51 @@ int
pl_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ int op_ret = 0;
+ int op_errno = EINVAL;
+ posix_locks_private_t *priv = this->private;
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this, loc,
+ ((fd_t *)NULL), priv);
+
STACK_WIND(frame, pl_removexattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno,
+ NULL);
+
+ return 0;
}
int32_t
pl_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_false;
+ pl_inode->check_mlock_info = _gf_false;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno,
xdata);
return 0;
@@ -4159,10 +4729,23 @@ int
pl_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ posix_locks_private_t *priv = this->private;
+
PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this,
+ ((loc_t *)NULL), fd, priv);
+
STACK_WIND(frame, pl_fremovexattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno,
+ NULL);
+ return 0;
}
int32_t
@@ -4198,7 +4781,7 @@ int
pl_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_xattrop_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata);
return 0;
@@ -4237,7 +4820,7 @@ int
pl_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_setattr_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
@@ -4298,7 +4881,7 @@ int
pl_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_readlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
return 0;
@@ -4316,7 +4899,7 @@ int
pl_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
dict_t *xdata)
{
- PL_LOCAL_GET_REQUESTS(frame, this, xdata, NULL, loc, NULL);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
STACK_WIND(frame, pl_access_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access, loc, mask, xdata);
return 0;
@@ -4465,7 +5048,7 @@ struct volume_options options[] = {
"be used in conjunction w/ revocation-clear-all."},
{.key = {"notify-contention"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "no",
+ .default_value = "yes",
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
.op_version = {GD_OP_VERSION_4_0_0},
.tags = {"locks", "contention"},
@@ -4488,5 +5071,25 @@ struct volume_options options[] = {
"on the same inode. If multiple lock requests are "
"received during this period, only one upcall will "
"be sent."},
+ {.key = {"enforce-mandatory-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .flags = OPT_FLAG_SETTABLE,
+ .op_version = {GD_OP_VERSION_6_0},
+ .description = "option to enable lock enforcement"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "locks",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
index 8b080dba030..604691fd887 100644
--- a/xlators/features/locks/src/reservelk.c
+++ b/xlators/features/locks/src/reservelk.c
@@ -7,12 +7,12 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include "locks.h"
#include "common.h"
@@ -31,12 +31,10 @@ reservelks_equal(posix_lock_t *l1, posix_lock_t *l2)
static posix_lock_t *
__reservelk_grantable(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- xlator_t *this = NULL;
+ xlator_t *this = THIS;
posix_lock_t *l = NULL;
posix_lock_t *ret_lock = NULL;
- this = THIS;
-
if (list_empty(&pl_inode->reservelk_list)) {
gf_log(this->name, GF_LOG_TRACE, "No reservelks in list");
goto out;
@@ -82,10 +80,9 @@ __matching_reservelk(pl_inode_t *pl_inode, posix_lock_t *lock)
static int
__reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
int ret = 0;
- conf = __matching_reservelk(pl_inode, lock);
+ posix_lock_t *conf = __matching_reservelk(pl_inode, lock);
if (conf) {
gf_log(this->name, GF_LOG_TRACE, "Matching reservelk found");
if (__same_owner_reservelk(lock, conf)) {
@@ -104,29 +101,28 @@ __reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
int
pl_verify_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+ const int can_block)
{
int ret = 0;
pthread_mutex_lock(&pl_inode->mutex);
{
if (__reservelk_conflict(this, pl_inode, lock)) {
+ lock->blocked = can_block;
+ list_add_tail(&lock->list, &pl_inode->blocked_calls);
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_TRACE,
"Found conflicting reservelk. Blocking until reservelk is "
"unlocked.");
- lock->blocked = can_block;
- list_add_tail(&lock->list, &pl_inode->blocked_calls);
ret = -1;
- goto unlock;
+ goto out;
}
-
- gf_log(this->name, GF_LOG_TRACE,
- "no conflicting reservelk found. Call continuing");
- ret = 0;
}
-unlock:
pthread_mutex_unlock(&pl_inode->mutex);
-
+ gf_log(this->name, GF_LOG_TRACE,
+ "no conflicting reservelk found. Call continuing");
+ ret = 0;
+out:
return ret;
}
@@ -135,12 +131,11 @@ unlock:
*/
static int
__lock_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+ const int can_block)
{
- posix_lock_t *conf = NULL;
int ret = -EINVAL;
- conf = __reservelk_grantable(pl_inode, lock);
+ posix_lock_t *conf = __reservelk_grantable(pl_inode, lock);
if (conf) {
ret = -EAGAIN;
if (can_block == 0)
@@ -183,9 +178,7 @@ find_matching_reservelk(posix_lock_t *lock, pl_inode_t *pl_inode)
static posix_lock_t *
__reserve_unlock_lock(xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
{
- posix_lock_t *conf = NULL;
-
- conf = find_matching_reservelk(lock, pl_inode);
+ posix_lock_t *conf = find_matching_reservelk(lock, pl_inode);
if (!conf) {
gf_log(this->name, GF_LOG_DEBUG, " Matching lock not found for unlock");
goto out;
@@ -319,8 +312,6 @@ grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode)
ret = pl_setlk(this, pl_inode, lock, can_block);
if (ret == -1) {
if (can_block) {
- pl_trace_block(this, lock->frame, fd, NULL, cmd,
- &lock->user_flock, NULL);
continue;
} else {
gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
@@ -345,6 +336,7 @@ pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
{
retlock = __reserve_unlock_lock(this, lock, pl_inode);
if (!retlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
gf_log(this->name, GF_LOG_DEBUG, "Bad Unlock issued on Inode lock");
ret = -EINVAL;
goto out;
@@ -354,9 +346,8 @@ pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
__destroy_lock(retlock);
ret = 0;
}
-out:
pthread_mutex_unlock(&pl_inode->mutex);
-
+out:
grant_blocked_reserve_locks(this, pl_inode);
grant_blocked_lock_calls(this, pl_inode);
@@ -372,19 +363,20 @@ pl_reserve_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
pthread_mutex_lock(&pl_inode->mutex);
{
ret = __lock_reservelk(this, pl_inode, lock, can_block);
- if (ret < 0)
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid, lkowner_utoa(&lock->owner),
- lock->user_flock.l_start, lock->user_flock.l_len);
- else
- gf_log(this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid, lkowner_utoa(&lock->owner), lock->fl_start,
- lock->fl_end);
}
pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (ret < 0)
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ else
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->fl_start, lock->fl_end);
+
return ret;
}
diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c
index c4759bd4a5f..d285b12b5aa 100644
--- a/xlators/features/locks/tests/unit-test.c
+++ b/xlators/features/locks/tests/unit-test.c
@@ -7,12 +7,12 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include "locks.h"
#include "common.h"
diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c
index 4989efb13d6..9c9047005d6 100644
--- a/xlators/features/marker/src/marker-common.c
+++ b/xlators/features/marker/src/marker-common.c
@@ -55,10 +55,3 @@ unlock:
return ret;
}
-
-int
-marker_filter_quota_xattr(dict_t *dict, char *key, data_t *value, void *data)
-{
- dict_del(dict, key);
- return 0;
-}
diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h
index 449d55b5ef0..7f8cffe7d35 100644
--- a/xlators/features/marker/src/marker-common.h
+++ b/xlators/features/marker/src/marker-common.h
@@ -10,12 +10,10 @@
#ifndef _MARKER_COMMON_H
#define _MARKER_COMMON_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "marker.h"
int32_t
marker_force_inode_ctx_get(inode_t *, xlator_t *, marker_inode_ctx_t **);
-int
-marker_filter_quota_xattr(dict_t *, char *, data_t *, void *);
#endif
diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h
index 0b50668a023..aedfdb4a1b7 100644
--- a/xlators/features/marker/src/marker-mem-types.h
+++ b/xlators/features/marker/src/marker-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __MARKER_MEM_TYPES_H__
#define __MARKER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_marker_mem_types_ {
/* Those are used by ALLOCATE_OR_GOTO macro */
diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c
index 694493c778c..ecd85d67b2b 100644
--- a/xlators/features/marker/src/marker-quota-helper.c
+++ b/xlators/features/marker/src/marker-quota-helper.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "locking.h"
+#include <glusterfs/locking.h>
#include "marker-quota.h"
#include "marker-common.h"
#include "marker-quota-helper.h"
@@ -378,96 +378,3 @@ mq_inode_ctx_new(inode_t *inode, xlator_t *this)
{
return __mq_inode_ctx_new(inode, this);
}
-
-quota_local_t *
-mq_local_new()
-{
- quota_local_t *local = NULL;
-
- local = mem_get0(THIS->local_pool);
- if (!local)
- goto out;
-
- local->ref = 1;
- LOCK_INIT(&local->lock);
-
- local->ctx = NULL;
- local->contri = NULL;
-
-out:
- return local;
-}
-
-quota_local_t *
-mq_local_ref(quota_local_t *local)
-{
- LOCK(&local->lock);
- {
- local->ref++;
- }
- UNLOCK(&local->lock);
-
- return local;
-}
-
-int32_t
-mq_local_unref(xlator_t *this, quota_local_t *local)
-{
- int32_t ref = 0;
- if (local == NULL)
- goto out;
-
- QUOTA_SAFE_DECREMENT(&local->lock, local->ref, ref);
-
- if (ref != 0)
- goto out;
-
- if (local->fd != NULL)
- fd_unref(local->fd);
-
- if (local->contri)
- GF_REF_PUT(local->contri);
-
- if (local->xdata)
- dict_unref(local->xdata);
-
- loc_wipe(&local->loc);
-
- loc_wipe(&local->parent_loc);
-
- LOCK_DESTROY(&local->lock);
-
- mem_put(local);
-out:
- return 0;
-}
-
-inode_contribution_t *
-mq_get_contribution_from_loc(xlator_t *this, loc_t *loc)
-{
- int32_t ret = 0;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
-
- ret = mq_inode_ctx_get(loc->inode, this, &ctx);
- if (ret < 0) {
- gf_log_callingfn(this->name, GF_LOG_WARNING,
- "cannot get marker-quota context from inode "
- "(gfid:%s, path:%s)",
- uuid_utoa(loc->inode->gfid), loc->path);
- goto err;
- }
-
- contribution = mq_get_contribution_node(loc->parent, ctx);
- if (contribution == NULL) {
- gf_log_callingfn(this->name, GF_LOG_WARNING,
- "inode (gfid:%s, path:%s) has "
- "no contribution towards parent (gfid:%s)",
- uuid_utoa(loc->inode->gfid), loc->path,
- uuid_utoa(loc->parent->gfid));
- goto err;
- }
-
-err:
- return contribution;
-}
diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h
index 99723def1b9..d4091dd2180 100644
--- a/xlators/features/marker/src/marker-quota-helper.h
+++ b/xlators/features/marker/src/marker-quota-helper.h
@@ -57,22 +57,10 @@ mq_delete_contribution_node(dict_t *, char *, inode_contribution_t *);
int32_t
mq_inode_loc_fill(const char *, inode_t *, loc_t *);
-quota_local_t *
-mq_local_new();
-
-quota_local_t *
-mq_local_ref(quota_local_t *);
-
-int32_t
-mq_local_unref(xlator_t *, quota_local_t *);
-
inode_contribution_t *
mq_contri_init(inode_t *inode);
inode_contribution_t *
mq_get_contribution_node(inode_t *, quota_inode_ctx_t *);
-inode_contribution_t *
-mq_get_contribution_from_loc(xlator_t *this, loc_t *loc);
-
#endif
diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c
index 0fc7ba66ee0..3de2ea1c92c 100644
--- a/xlators/features/marker/src/marker-quota.c
+++ b/xlators/features/marker/src/marker-quota.c
@@ -7,16 +7,16 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "libxlator.h"
-#include "common-utils.h"
-#include "byte-order.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/byte-order.h>
#include "marker-quota.h"
#include "marker-quota-helper.h"
-#include "syncop.h"
-#include "quota-common-utils.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/quota-common-utils.h>
int
mq_loc_copy(loc_t *dst, loc_t *src)
@@ -134,27 +134,14 @@ out:
return -1;
}
-int32_t
+static void
mq_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t status)
{
GF_VALIDATE_OR_GOTO("marker", ctx, out);
mq_set_ctx_status(ctx, &ctx->dirty_status, status);
- return 0;
-out:
- return -1;
-}
-
-int32_t
-mq_test_and_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t *status)
-{
- GF_VALIDATE_OR_GOTO("marker", ctx, out);
- GF_VALIDATE_OR_GOTO("marker", status, out);
-
- mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, status);
- return 0;
out:
- return -1;
+ return;
}
int
@@ -866,19 +853,6 @@ out:
}
int32_t
-mq_get_size(xlator_t *this, loc_t *loc, quota_meta_t *size)
-{
- return _mq_get_metadata(this, loc, NULL, size, 0);
-}
-
-int32_t
-mq_get_contri(xlator_t *this, loc_t *loc, quota_meta_t *contri,
- uuid_t contri_gfid)
-{
- return _mq_get_metadata(this, loc, contri, NULL, contri_gfid);
-}
-
-int32_t
mq_get_delta(xlator_t *this, loc_t *loc, quota_meta_t *delta,
quota_inode_ctx_t *ctx, inode_contribution_t *contribution)
{
@@ -1337,19 +1311,6 @@ out:
return ret;
}
-int
-mq_create_xattrs_blocking_txn(xlator_t *this, loc_t *loc, struct iatt *buf)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("marker", loc, out);
- GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
-
- ret = _mq_create_xattrs_txn(this, loc, buf, _gf_false);
-out:
- return ret;
-}
-
int32_t
mq_reduce_parent_size_task(void *opaque)
{
@@ -1752,21 +1713,17 @@ mq_initiate_quota_task(void *opaque)
}
out:
- if (dirty) {
- if (ret < 0) {
- /* On failure clear dirty status flag.
- * In the next lookup inspect_directory_xattr
- * can set the status flag and fix the
- * dirty directory.
- * Do the same if the dir was dirty before
- * txn
- */
- ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx);
- if (ret == 0)
- mq_set_ctx_dirty_status(parent_ctx, _gf_false);
- } else {
- ret = mq_mark_dirty(this, &parent_loc, 0);
- }
+ if ((dirty) && (ret < 0)) {
+ /* On failure clear dirty status flag.
+ * In the next lookup inspect_directory_xattr
+ * can set the status flag and fix the
+ * dirty directory.
+ * Do the same if the dir was dirty before
+ * txn
+ */
+ ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx);
+ if (ret == 0)
+ mq_set_ctx_dirty_status(parent_ctx, _gf_false);
}
if (locked)
@@ -1977,7 +1934,7 @@ mq_update_dirty_inode_task(void *opaque)
/* Inculde for self */
contri_sum.dir_count++;
- ret = mq_get_size(this, loc, &size);
+ ret = _mq_get_metadata(this, loc, NULL, &size, 0);
if (ret < 0)
goto out;
@@ -2046,8 +2003,8 @@ mq_update_dirty_inode_txn(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx)
GF_VALIDATE_OR_GOTO("marker", loc, out);
GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
- ret = mq_test_and_set_ctx_dirty_status(ctx, &status);
- if (ret < 0 || status == _gf_true)
+ mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, &status);
+ if (status == _gf_true)
goto out;
ret = mq_synctask(this, mq_update_dirty_inode_task, _gf_true, loc);
@@ -2102,6 +2059,9 @@ mq_inspect_directory_xattr(xlator_t *this, quota_inode_ctx_t *ctx,
if (ret < 0)
goto create_xattr;
+ if (!contribution)
+ goto create_xattr;
+
if (!loc_is_root(loc)) {
GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen);
if (keylen < 0) {
diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h
index 7e24f1bc4f7..4bbf6878b22 100644
--- a/xlators/features/marker/src/marker-quota.h
+++ b/xlators/features/marker/src/marker-quota.h
@@ -10,11 +10,11 @@
#ifndef _MARKER_QUOTA_H
#define _MARKER_QUOTA_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "marker-mem-types.h"
-#include "refcount.h"
-#include "quota-common-utils.h"
-#include "call-stub.h"
+#include <glusterfs/refcount.h>
+#include <glusterfs/quota-common-utils.h>
+#include <glusterfs/call-stub.h>
#define QUOTA_XATTR_PREFIX "trusted.glusterfs"
#define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty"
@@ -23,15 +23,6 @@
#define QUOTA_KEY_MAX 512
#define READDIR_BUF 4096
-#define QUOTA_STACK_DESTROY(_frame, _this) \
- do { \
- quota_local_t *_local = NULL; \
- _local = _frame->local; \
- _frame->local = NULL; \
- STACK_DESTROY(_frame->root); \
- mq_local_unref(_this, _local); \
- } while (0)
-
#define QUOTA_ALLOC(var, type, ret) \
do { \
ret = 0; \
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
index 2d3177c7ec3..1375ccc498c 100644
--- a/xlators/features/marker/src/marker.c
+++ b/xlators/features/marker/src/marker.c
@@ -7,17 +7,17 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "libxlator.h"
#include "marker.h"
#include "marker-mem-types.h"
#include "marker-quota.h"
#include "marker-quota-helper.h"
#include "marker-common.h"
-#include "byte-order.h"
-#include "syncop.h"
-#include "syscall.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syscall.h>
#include <fnmatch.h>
@@ -242,24 +242,19 @@ out:
return ret;
}
-int32_t
+void
marker_error_handler(xlator_t *this, marker_local_t *local, int32_t op_errno)
{
- marker_conf_t *priv = NULL;
- const char *path = NULL;
-
- priv = (marker_conf_t *)this->private;
- path = local ? (local->loc.path ? local->loc.path
- : uuid_utoa(local->loc.gfid))
- : "<nul>";
+ marker_conf_t *priv = (marker_conf_t *)this->private;
+ const char *path = local ? ((local->loc.path) ? local->loc.path
+ : uuid_utoa(local->loc.gfid))
+ : "<nul>";
gf_log(this->name, GF_LOG_CRITICAL,
"Indexing gone corrupt at %s (reason: %s)."
" Geo-replication slave content needs to be revalidated",
path, strerror(op_errno));
sys_unlink(priv->timestamp_file);
-
- return 0;
}
int32_t
@@ -567,24 +562,21 @@ marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
int32_t ret = 0;
- int32_t done = 0;
+ int32_t done = 1;
marker_local_t *local = NULL;
local = (marker_local_t *)frame->local;
if (op_ret == -1 && op_errno == ENOSPC) {
marker_error_handler(this, local, op_errno);
- done = 1;
goto out;
}
if (local) {
if (local->loc.path && strcmp(local->loc.path, "/") == 0) {
- done = 1;
goto out;
}
if (__is_root_gfid(local->loc.gfid)) {
- done = 1;
goto out;
}
}
@@ -595,14 +587,11 @@ marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_log(this->name, GF_LOG_DEBUG,
"Error occurred "
"while traversing to the parent, stopping marker");
-
- done = 1;
-
goto out;
}
marker_start_setxattr(frame, this);
-
+ done = 0;
out:
if (done) {
marker_setxattr_done(frame);
@@ -3564,3 +3553,16 @@ struct volume_options options[] = {
.flags = OPT_FLAG_NONE,
},
{.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "marker",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h
index 3b6f4ec8b72..4821094c14b 100644
--- a/xlators/features/marker/src/marker.h
+++ b/xlators/features/marker/src/marker.h
@@ -11,10 +11,10 @@
#define _MARKER_H
#include "marker-quota.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-uuid.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/call-stub.h>
#define MARKER_XATTR_PREFIX "trusted.glusterfs"
#define XTIME "xtime"
diff --git a/xlators/experimental/fdl/Makefile.am b/xlators/features/metadisp/Makefile.am
index a985f42a877..a985f42a877 100644
--- a/xlators/experimental/fdl/Makefile.am
+++ b/xlators/features/metadisp/Makefile.am
diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am
new file mode 100644
index 00000000000..1520ad8c424
--- /dev/null
+++ b/xlators/features/metadisp/src/Makefile.am
@@ -0,0 +1,38 @@
+noinst_PYTHON = gen-fops.py
+
+EXTRA_DIST = fops-tmpl.c
+
+xlator_LTLIBRARIES = metadisp.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+nodist_metadisp_la_SOURCES = fops.c
+
+BUILT_SOURCES = fops.c
+
+metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+metadisp_la_SOURCES = metadisp.c \
+ metadisp-unlink.c \
+ metadisp-stat.c \
+ metadisp-lookup.c \
+ metadisp-readdir.c \
+ metadisp-create.c \
+ metadisp-open.c \
+ metadisp-fsync.c \
+ metadisp-setattr.c \
+ backend.c
+
+metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = metadisp.h metadisp-fops.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py
+ PYTHONPATH=$(top_srcdir)/libglusterfs/src \
+ $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@
+
+CLEANFILES = $(nodist_metadisp_la_SOURCES)
diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c
new file mode 100644
index 00000000000..ee2c25bfaa7
--- /dev/null
+++ b/xlators/features/metadisp/src/backend.c
@@ -0,0 +1,45 @@
+#define GFID_STR_LEN 37
+
+#include "metadisp.h"
+
+/*
+ * backend.c
+ *
+ * functions responsible for converting user-facing paths to backend-style
+ * "/$GFID" paths.
+ */
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc)
+{
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char gfid_buf[GFID_STR_LEN + 1] = {
+ 0,
+ };
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO("metadisp", src_loc, out);
+ GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out);
+
+ loc_copy(dst_loc, src_loc);
+ memcpy(dst_loc->pargfid, root, sizeof(root));
+ GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke
+ // whatever loc_copy gave us
+
+ uuid_utoa_r(gfid, gfid_buf);
+
+ path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char),
+ gf_common_mt_char); // freed via loc_wipe
+
+ path[0] = '/';
+ strncpy(path + 1, gfid_buf, GFID_STR_LEN);
+ path[GFID_STR_LEN] = 0;
+ dst_loc->path = path;
+ if (src_loc->name)
+ dst_loc->name = strrchr(dst_loc->path, '/');
+ if (dst_loc->name)
+ dst_loc->name++;
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c
new file mode 100644
index 00000000000..4385b7dd5b7
--- /dev/null
+++ b/xlators/features/metadisp/src/fops-tmpl.c
@@ -0,0 +1,10 @@
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/xlator.h>
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+#pragma generate
diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py
new file mode 100644
index 00000000000..8b5e120fdec
--- /dev/null
+++ b/xlators/features/metadisp/src/gen-fops.py
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+
+import sys
+from generator import fop_subs, generate
+
+FN_METADATA_CHILD_GENERIC = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ metadata");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_GENERIC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ generic");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATAFD_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ datafd");
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATALOC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ dataloc");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+"""
+
+FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@,"
+
+skipped = [
+ "readdir",
+ "readdirp",
+ "lookup",
+ "fsync",
+ "stat",
+ "open",
+ "create",
+ "unlink",
+ "setattr",
+ # TODO: implement "inodelk",
+]
+
+
+def gen_fops():
+ done = skipped
+
+ #
+ # these are fops that wind to the DATA_CHILD
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "writev",
+ "readv",
+ "ftruncate",
+ "zerofill",
+ "discard",
+ "seek",
+ "fstat",
+ ]:
+ done = done + [name]
+ print(generate(FN_DATAFD_TEMPLATE, name, fop_subs))
+
+ for name in ["truncate"]:
+ done = done + [name]
+ print(generate(FN_DATALOC_TEMPLATE, name, fop_subs))
+
+ # these are fops that operate solely on dentries, folders,
+ # or extended attributes. Therefore, they must always
+ # wind to METADATA_CHILD and should never perform
+ # any path rewriting
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "mkdir",
+ "symlink",
+ "link",
+ "rename",
+ "mknod",
+ "opendir",
+ # "readdir, # special-cased
+ # "readdirp, # special-cased
+ "fsyncdir",
+ # "setattr", # special-cased
+ "readlink",
+ "fentrylk",
+ "access",
+ # TODO: these wind to both,
+ # data for backend-attributes and metadata for the rest
+ "xattrop",
+ "setxattr",
+ "getxattr",
+ "removexattr",
+ "fgetxattr",
+ "fsetxattr",
+ "fremovexattr",
+ ]:
+
+ done = done + [name]
+ print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs))
+
+ print("struct xlator_fops fops = {")
+ for name in done:
+ print(generate(FOPS_LINE_TEMPLATE, name, fop_subs))
+
+ print("};")
+
+
+for l in open(sys.argv[1], "r").readlines():
+ if l.find("#pragma generate") != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_fops()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c
new file mode 100644
index 00000000000..f8c9798dd59
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-create.c
@@ -0,0 +1,101 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Create, like stat, is a two-step process. We send a create
+ * to the METADATA_CHILD, then send another create to the DATA_CHILD.
+ *
+ * We do the metadata child first to ensure that the ACLs are enforced.
+ */
+
+int32_t
+metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ // create the backend data inode
+ STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = cookie;
+ if (op_ret != 0) {
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+ }
+
+ if (stub == NULL) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+
+ loc_t backend_loc = {
+ 0,
+ };
+ call_stub_t *stub = NULL;
+ uuid_t *gfid_req = NULL;
+
+ RESOLVE_GFID_REQ(xdata, gfid_req, out);
+
+ if (build_backend_loc(*gfid_req, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ frame->local = loc;
+
+ stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags,
+ mode, umask, fd, xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h
new file mode 100644
index 00000000000..56dd427cf34
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fops.h
@@ -0,0 +1,51 @@
+#ifndef GF_METADISP_FOPS_H_
+#define GF_METADISP_FOPS_H_
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/glusterfs.h>
+
+#include <sys/types.h>
+
+/* fops in here are defined in their own file. Every other fop is just defined
+ * inline of fops.c */
+
+int
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata);
+
+int
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+#endif
diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c
new file mode 100644
index 00000000000..2e46fa84eac
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fsync.c
@@ -0,0 +1,54 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c
new file mode 100644
index 00000000000..27d90c9f746
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-lookup.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Lookup, like stat, is a two-step process for grabbing the metadata details
+ * as well as the data details.
+ */
+
+int32_t
+metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ METADISP_TRACE("backend_lookup_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("backend_lookup_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->lookup, &backend_loc, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+
+ // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ METADISP_TRACE("lookup");
+ call_stub_t *stub = NULL;
+ stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c
new file mode 100644
index 00000000000..64814afe636
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-open.c
@@ -0,0 +1,70 @@
+#include <glusterfs/call-stub.h>
+#include "metadisp.h"
+
+int32_t
+metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE("got open results %d %d", op_ret, op_errno);
+
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!stub) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd,
+ xdata);
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c
new file mode 100644
index 00000000000..5f840b1e88f
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-readdir.c
@@ -0,0 +1,65 @@
+#include "metadisp.h"
+
+/**
+ * With a change to the posix xlator, readdir and readdirp are shockingly
+ * simple.
+ *
+ * The issue with separating the backend data of the files
+ * with the metadata is that readdirs must now read from multiple sources
+ * to coalesce the directory entries.
+ *
+ * The way we do this is to tell the METADATA_CHILD that when it's
+ * running readdirp, each file entry should have a stat wound to
+ * 'stat-source-of-truth'.
+ *
+ * see metadisp_stat for how it handles winds _from_posix.
+ */
+
+int32_t
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ /*
+ * Always use readdirp, even if the original was readdir. Why? Because NFS.
+ * There are multiple translations between Gluster, UNIX, and NFS stat
+ * structures in that path. One of them uses the type etc. from the stat
+ * structure, which is only filled in by readdirp. If we use readdir, the
+ * entries do actually go all the way back to the client and are visible in
+ * getdents, but then the readdir throws them away because of the
+ * uninitialized type.
+ */
+ GF_UNUSED int32_t ret;
+ if (!xdata) {
+ xdata = dict_new();
+ }
+
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
+}
+
+int32_t
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ GF_UNUSED int32_t ret;
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c
new file mode 100644
index 00000000000..6991cf644f3
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-setattr.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid,
+ xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(statpost->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ METADISP_TRACE("setattr");
+ call_stub_t *stub = NULL;
+ stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf,
+ valid, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c
new file mode 100644
index 00000000000..b06d0dbcddd
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-stat.c
@@ -0,0 +1,124 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The stat flow in METADISP is complicated because we must
+ * do ensure a few things:
+ * 1. stat, on the path within the metadata layer,
+ * MUST get the backend FD of the data layer.
+ * --- we wind to the metadata layer, then the data layer.
+ *
+ * 2. the metadata layer MUST be able to ask the data
+ * layer for stat information.
+ * --- this is 'syncop-internal-from-posix'
+ *
+ * 3. when the metadata exists BUT the data is missing,
+ * we MUST mark the backend file as bad and heal it.
+ */
+
+int32_t
+metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno);
+ if (op_errno == ENOENT) {
+ STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL);
+ return 0;
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("winding stat to path %s", loc->path);
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("bad object, sending EUCLEAN");
+ STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this),
+ SECOND_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ METADISP_TRACE("got stat results %d %d", op_ret, op_errno);
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // only use the stub for the files
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ loc_t backend_loc = {
+ 0,
+ };
+ METADISP_FILTER_ROOT(stat, loc, xdata);
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) {
+ // if we've just been sent a stat from posix, then we know
+ // that we must send down a stat for a file to the second child.
+ //
+ // that means we can skip the stat for the first child and just
+ // send to the data disk.
+ METADISP_TRACE("got syncop-internal-from-posix");
+ STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->stat, &backend_loc, xdata);
+ return 0;
+ }
+
+ // we do not know if the request is for a file, folder, etc. wind
+ // to first child to find out.
+ stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata);
+ METADISP_TRACE("winding stat to first child %s", loc->path);
+ STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c
new file mode 100644
index 00000000000..1f6a8eb35ce
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-unlink.c
@@ -0,0 +1,160 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The unlink flow in metadisp is complicated because we must
+ * do ensure that UNLINK causes both the metadata objects
+ * to get removed and the data objects to get removed.
+ */
+
+int32_t
+metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
+{
+ METADISP_TRACE("winding backend unlink to path %s", loc->path);
+ STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE(". %d %d", op_ret, op_errno);
+
+ int ret = 0;
+ call_stub_t *stub = NULL;
+ int nlink = 0;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink);
+ if (ret != 0) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto unwind;
+ }
+ METADISP_TRACE("frontend hardlink count %d %d", ret, nlink);
+ if (nlink > 1) {
+ goto unwind;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ call_stub_t *stub = NULL;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // fail fast on empty gfid so we don't loop forever
+ if (gf_uuid_is_null(buf->ia_gfid)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto unwind;
+ }
+
+ // fill gfid since the stub is incomplete
+ memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t));
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("winding lookup for unlink to path %s", loc->path);
+
+ // loop back to ourselves after a lookup
+ stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub,
+ METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+ }
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ //
+ // ensure we get the link count on the unlink response, so we can
+ // account for hardlinks before winding to the backend.
+ // NOTE:
+ // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation
+ // is needed to ensure that multiple requests will work in the same
+ // xlator stack.
+ //
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+
+ METADISP_TRACE("winding frontend unlink to path %s", loc->path);
+ stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag,
+ xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c
new file mode 100644
index 00000000000..3c8f150cebc
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.c
@@ -0,0 +1,46 @@
+#include <glusterfs/call-stub.h>
+
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+int32_t
+init(xlator_t *this)
+{
+ if (!this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "not configured with children. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+/* defined in fops.c */
+struct xlator_fops fops;
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .op_version = {1},
+ .identifier = "metadisp",
+ .category = GF_EXPERIMENTAL,
+};
diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h
new file mode 100644
index 00000000000..c8fd7a13c04
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef GF_METADISP_H_
+#define GF_METADISP_H_
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#define METADATA_CHILD(_this) FIRST_CHILD(_this)
+#define DATA_CHILD(_this) SECOND_CHILD(_this)
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc);
+
+#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args)
+
+#define METADISP_FILTER_ROOT(_op, _args...) \
+ if (strcmp(loc->path, "/") == 0) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \
+ if (__is_root_gfid(_gfid)) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \
+ VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \
+ _lbl)
+
+#endif /* __TEMPLATE_H__ */
diff --git a/xlators/features/namespace/src/namespace.c b/xlators/features/namespace/src/namespace.c
index 79812aa330f..86c5ebee900 100644
--- a/xlators/features/namespace/src/namespace.c
+++ b/xlators/features/namespace/src/namespace.c
@@ -15,10 +15,9 @@
#include <sys/types.h>
-#include "defaults.h"
-#include "glusterfs.h"
-#include "hashfn.h"
-#include "logging.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/logging.h>
#include "namespace.h"
/* Return codes for common path parsing functions. */
@@ -1330,3 +1329,16 @@ struct volume_options options[] = {
},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "namespace",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/namespace/src/namespace.h b/xlators/features/namespace/src/namespace.h
index 4c04cb3f471..3a9b84d6426 100644
--- a/xlators/features/namespace/src/namespace.h
+++ b/xlators/features/namespace/src/namespace.h
@@ -6,8 +6,8 @@
#include "config.h"
#endif
-#include "xlator.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
#define GF_NAMESPACE "namespace"
diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h
index 914bfb22ed0..416456b13af 100644
--- a/xlators/features/quiesce/src/quiesce-mem-types.h
+++ b/xlators/features/quiesce/src/quiesce-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __QUIESCE_MEM_TYPES_H__
#define __QUIESCE_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_quiesce_mem_types_ {
gf_quiesce_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/quiesce/src/quiesce-messages.h b/xlators/features/quiesce/src/quiesce-messages.h
index 864a18147dc..32ffd409807 100644
--- a/xlators/features/quiesce/src/quiesce-messages.h
+++ b/xlators/features/quiesce/src/quiesce-messages.h
@@ -11,7 +11,7 @@
#ifndef __QUIESCE_MESSAGES_H__
#define __QUIESCE_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
index 41912b7e882..0e5eb60a16f 100644
--- a/xlators/features/quiesce/src/quiesce.c
+++ b/xlators/features/quiesce/src/quiesce.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
#include "quiesce.h"
-#include "defaults.h"
-#include "call-stub.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
/* TODO: */
/* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */
@@ -89,13 +89,14 @@ gf_quiesce_populate_failover_hosts(xlator_t *this, quiesce_priv_t *priv,
if (!dup_val)
goto out;
+ addr_tok = strtok_r(dup_val, ",", &save_ptr);
LOCK(&priv->lock);
{
if (!list_empty(&priv->failover_list))
__gf_quiesce_cleanup_failover_hosts(this, priv);
- addr_tok = strtok_r(dup_val, ",", &save_ptr);
+
while (addr_tok) {
- if (!valid_internet_address(addr_tok, _gf_true)) {
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_false)) {
gf_msg(this->name, GF_LOG_INFO, 0, QUIESCE_MSG_INVAL_HOST,
"Specified "
"invalid internet address:%s",
@@ -1192,6 +1193,33 @@ quiesce_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
}
int32_t
+quiesce_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+ }
+
+ stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
+}
+
+int32_t
quiesce_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
@@ -2364,19 +2392,10 @@ quiesce_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
{
quiesce_priv_t *priv = NULL;
call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
priv = this->private;
if (priv && priv->pass_through) {
- local = mem_get0(priv->local_pool);
- local->fd = fd_ref(fd);
- local->offset = offset;
- local->len = len;
- local->flag = mode;
-
- frame->local = local;
-
STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
xdata);
@@ -2536,6 +2555,7 @@ fini(xlator_t *this)
this->private = NULL;
mem_pool_destroy(priv->local_pool);
+ priv->local_pool = NULL;
LOCK_DESTROY(&priv->lock);
GF_FREE(priv);
out:
@@ -2592,7 +2612,9 @@ struct xlator_fops fops = {
.truncate = quiesce_truncate,
.ftruncate = quiesce_ftruncate,
.setxattr = quiesce_setxattr,
+ .fsetxattr = quiesce_fsetxattr,
.removexattr = quiesce_removexattr,
+ .fremovexattr = quiesce_fremovexattr,
.symlink = quiesce_symlink,
.unlink = quiesce_unlink,
.link = quiesce_link,
@@ -2625,6 +2647,7 @@ struct xlator_fops fops = {
.access = quiesce_access,
.readlink = quiesce_readlink,
.getxattr = quiesce_getxattr,
+ .fgetxattr = quiesce_fgetxattr,
.open = quiesce_open,
.readv = quiesce_readv,
.flush = quiesce_flush,
@@ -2664,3 +2687,18 @@ struct volume_options options[] = {
"the thin clients can failover to."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quiesce",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h
index ed8f8fa2934..6ab2af40a56 100644
--- a/xlators/features/quiesce/src/quiesce.h
+++ b/xlators/features/quiesce/src/quiesce.h
@@ -13,8 +13,8 @@
#include "quiesce-mem-types.h"
#include "quiesce-messages.h"
-#include "xlator.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
#define GF_FOPS_EXPECTED_IN_PARALLEL 512
diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am
index 0ae47fc189f..1c2dcef0ca3 100644
--- a/xlators/features/quota/src/Makefile.am
+++ b/xlators/features/quota/src/Makefile.am
@@ -4,7 +4,7 @@ endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
quota_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-quotad_la_LDFLAGS = -module -export-symbols $(top_srcdir)/xlators/features/quota/src/quotad.sym $(GF_XLATOR_LDFLAGS)
+quotad_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
quota_la_SOURCES = quota.c quota-enforcer-client.c
quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
@@ -27,6 +27,3 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
-
-EXTRA_DIST = quotad.sym
-
diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
index 1a4c2e30dd6..480d64ade27 100644
--- a/xlators/features/quota/src/quota-enforcer-client.c
+++ b/xlators/features/quota/src/quota-enforcer-client.c
@@ -32,12 +32,6 @@
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
#include "quota.h"
#include "quota-messages.h"
@@ -362,16 +356,28 @@ quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata,
{
xlator_t *this = NULL;
int ret = 0;
+ quota_priv_t *priv = NULL;
this = mydata;
-
+ priv = this->private;
switch (event) {
case RPC_CLNT_CONNECT: {
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ priv->conn_status = _gf_true;
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT");
break;
}
case RPC_CLNT_DISCONNECT: {
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ priv->conn_status = _gf_false;
+ pthread_cond_signal(&priv->conn_cond);
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT");
break;
}
diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h
index 24b198eae35..782a7de96bb 100644
--- a/xlators/features/quota/src/quota-mem-types.h
+++ b/xlators/features/quota/src/quota-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __QUOTA_MEM_TYPES_H__
#define __QUOTA_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_quota_mem_types_ {
/* Those are used by QUOTA_ALLOC_OR_GOTO macro */
diff --git a/xlators/features/quota/src/quota-messages.h b/xlators/features/quota/src/quota-messages.h
index 5129d2ee9ad..d434ed75e76 100644
--- a/xlators/features/quota/src/quota-messages.h
+++ b/xlators/features/quota/src/quota-messages.h
@@ -11,7 +11,7 @@
#ifndef _QUOTA_MESSAGES_H_
#define _QUOTA_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 9ad4f902d08..18df9ae6d19 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -7,15 +7,11 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include <fnmatch.h>
#include "quota.h"
-#include "common-utils.h"
-#include "defaults.h"
-#include "statedump.h"
-#include "quota-common-utils.h"
+#include <glusterfs/statedump.h>
#include "quota-messages.h"
-#include "events.h"
+#include <glusterfs/events.h>
struct volume_options options[];
@@ -564,15 +560,14 @@ quota_handle_validate_error(call_frame_t *frame, int32_t op_ret,
if (local == NULL)
goto out;
- LOCK(&local->lock);
- {
- if (op_ret < 0) {
+ if (op_ret < 0) {
+ LOCK(&local->lock);
+ {
local->op_ret = op_ret;
local->op_errno = op_errno;
}
+ UNLOCK(&local->lock);
}
- UNLOCK(&local->lock);
-
/* we abort checking limits on this path to root */
quota_link_count_decrement(frame);
out:
@@ -591,9 +586,6 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -631,13 +623,12 @@ quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* loop of validation and checking
* limit when timeout is zero.
*/
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -649,64 +640,51 @@ unwind:
return 0;
}
-static uint64_t
-quota_time_elapsed(struct timeval *now, struct timeval *then)
+static inline gf_boolean_t
+quota_timeout(time_t t, uint32_t timeout)
{
- return (now->tv_sec - then->tv_sec);
-}
-
-int32_t
-quota_timeout(struct timeval *tv, int32_t timeout)
-{
- struct timeval now = {
- 0,
- };
- int32_t timed_out = 0;
-
- gettimeofday(&now, NULL);
-
- if (quota_time_elapsed(&now, tv) >= timeout) {
- timed_out = 1;
- }
-
- return timed_out;
+ return (gf_time() - t) >= timeout;
}
/* Return: 1 if new entry added
* 0 no entry added
+ * -1 on errors
*/
static int32_t
quota_add_parent(struct list_head *list, char *name, uuid_t pgfid)
{
quota_dentry_t *entry = NULL;
gf_boolean_t found = _gf_false;
+ int ret = 0;
- if (list == NULL) {
- goto out;
- }
-
- list_for_each_entry(entry, list, next)
- {
- if (gf_uuid_compare(pgfid, entry->par) == 0) {
- found = _gf_true;
- goto out;
+ if (!list_empty(list)) {
+ list_for_each_entry(entry, list, next)
+ {
+ if (gf_uuid_compare(pgfid, entry->par) == 0) {
+ found = _gf_true;
+ goto out;
+ }
}
}
entry = __quota_dentry_new(NULL, name, pgfid);
if (entry)
list_add_tail(&entry->next, list);
+ else
+ ret = -1;
out:
if (found)
return 0;
- else
+ else if (ret == 0)
return 1;
+ else
+ return -1;
}
/* This function iterates the parent list in inode
* context and add unique parent to the list
- * Returns number of dentry added to the list
+ * Returns number of dentry added to the list, or -1 on errors
*/
static int32_t
quota_add_parents_from_ctx(quota_inode_ctx_t *ctx, struct list_head *list)
@@ -723,15 +701,16 @@ quota_add_parents_from_ctx(quota_inode_ctx_t *ctx, struct list_head *list)
list_for_each_entry(dentry, &ctx->parents, next)
{
ret = quota_add_parent(list, dentry->name, dentry->par);
-
if (ret == 1)
count++;
+ else if (ret == -1)
+ break;
}
}
UNLOCK(&ctx->lock);
out:
- return count;
+ return (ret == -1) ? -1 : count;
}
int32_t
@@ -750,10 +729,9 @@ quota_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_dentry_t *dentry = NULL;
quota_dentry_t *tmp = NULL;
quota_inode_ctx_t *ctx = NULL;
- struct list_head parents = {
- 0,
- };
+ struct list_head parents;
quota_local_t *local = NULL;
+ int ret;
INIT_LIST_HEAD(&parents);
@@ -828,7 +806,11 @@ quota_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
- quota_add_parents_from_ctx(ctx, &parents);
+ ret = quota_add_parents_from_ctx(ctx, &parents);
+ if (ret == -1) {
+ op_errno = errno;
+ goto err;
+ }
if (list_empty(&parents)) {
/* we built ancestry for a directory */
@@ -843,7 +825,11 @@ quota_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
GF_ASSERT (&entry->list != &entries->list);
*/
- quota_add_parent(&parents, entry->d_name, parent->gfid);
+ ret = quota_add_parent(&parents, entry->d_name, parent->gfid);
+ if (ret == -1) {
+ op_errno = errno;
+ goto err;
+ }
}
local->ancestry_cbk(&parents, local->loc.inode, 0, 0, local->ancestry_data);
@@ -861,9 +847,11 @@ cleanup:
parent = NULL;
}
- list_for_each_entry_safe(dentry, tmp, &parents, next)
- {
- __quota_dentry_free(dentry);
+ if (!list_empty(&parents)) {
+ list_for_each_entry_safe(dentry, tmp, &parents, next)
+ {
+ __quota_dentry_free(dentry);
+ }
}
return 0;
@@ -1119,7 +1107,7 @@ quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if ((object_aggr_count) > ctx->object_hard_lim) {
hard_limit_exceeded = 1;
@@ -1186,7 +1174,7 @@ quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
timeout = priv->hard_timeout;
}
- if (!just_validated && quota_timeout(&ctx->tv, timeout)) {
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
need_validate = 1;
} else if (wouldbe_size >= ctx->hard_lim) {
hard_limit_exceeded = 1;
@@ -1755,19 +1743,13 @@ quota_writev_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
if ((op_errno == EDQUOT) && (local->space_available > 0)) {
new_count = iov_subset(vector, count, 0, local->space_available,
- NULL);
-
- new_vector = GF_CALLOC(new_count, sizeof(struct iovec),
- gf_common_mt_iovec);
- if (new_vector == NULL) {
+ &new_vector, 0);
+ if (new_count < 0) {
local->op_ret = -1;
local->op_errno = ENOMEM;
goto unwind;
}
- new_count = iov_subset(vector, count, 0, local->space_available,
- new_vector);
-
vector = new_vector;
count = new_count;
} else if (op_errno == ENOENT || op_errno == ESTALE) {
@@ -1839,9 +1821,7 @@ quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
quota_inode_ctx_t *ctx = NULL;
quota_dentry_t *dentry = NULL, *tmp = NULL;
call_stub_t *stub = NULL;
- struct list_head head = {
- 0,
- };
+ struct list_head head;
inode_t *par_inode = NULL;
priv = this->private;
@@ -1881,9 +1861,13 @@ quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
GF_VALIDATE_OR_GOTO(this->name, priv, unwind);
- size = iov_length(vector, count);
-
parents = quota_add_parents_from_ctx(ctx, &head);
+ if (parents == -1) {
+ op_errno = errno;
+ goto unwind;
+ }
+
+ size = iov_length(vector, count);
LOCK(&local->lock);
{
@@ -1905,10 +1889,12 @@ quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
par_inode = do_quota_check_limit(frame, fd->inode, this, dentry,
_gf_false);
if (par_inode == NULL) {
- /* remove stale entry from inode ctx */
- quota_dentry_del(ctx, dentry->name, dentry->par);
- parents--;
- fail_count++;
+ if (ctx) {
+ /* remove stale entry from inode ctx */
+ quota_dentry_del(ctx, dentry->name, dentry->par);
+ parents--;
+ fail_count++;
+ }
} else {
inode_unref(par_inode);
}
@@ -3276,12 +3262,11 @@ quota_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- LOCK(&ctx->lock);
- {
- if (buf)
- ctx->buf = *buf;
+ if (buf) {
+ LOCK(&ctx->lock);
+ ctx->buf = *buf;
+ UNLOCK(&ctx->lock);
}
- UNLOCK(&ctx->lock);
out:
QUOTA_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
@@ -3355,12 +3340,11 @@ quota_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- LOCK(&ctx->lock);
- {
- if (buf)
- ctx->buf = *buf;
+ if (buf) {
+ LOCK(&ctx->lock);
+ ctx->buf = *buf;
+ UNLOCK(&ctx->lock);
}
- UNLOCK(&ctx->lock);
out:
QUOTA_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
@@ -3650,12 +3634,11 @@ quota_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
- LOCK(&ctx->lock);
- {
- if (statpost)
- ctx->buf = *statpost;
+ if (statpost) {
+ LOCK(&ctx->lock);
+ ctx->buf = *statpost;
+ UNLOCK(&ctx->lock);
}
- UNLOCK(&ctx->lock);
out:
QUOTA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, statpost,
@@ -4310,9 +4293,6 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
quota_meta_t size = {
0,
};
- struct timeval tv = {
- 0,
- };
local = frame->local;
@@ -4344,13 +4324,12 @@ quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
}
- gettimeofday(&tv, NULL);
LOCK(&ctx->lock);
{
ctx->size = size.size;
+ ctx->validate_time = gf_time();
ctx->file_count = size.file_count;
ctx->dir_count = size.dir_count;
- memcpy(&ctx->tv, &tv, sizeof(struct timeval));
}
UNLOCK(&ctx->lock);
@@ -4805,6 +4784,10 @@ quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
GF_VALIDATE_OR_GOTO(this->name, priv, unwind);
parents = quota_add_parents_from_ctx(ctx, &head);
+ if (parents == -1) {
+ op_errno = errno;
+ goto unwind;
+ }
/*
* Note that by using len as the delta we're assuming the range from
@@ -4865,7 +4848,7 @@ off:
void
quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
- char **path, struct timeval *cur_time)
+ char **path, time_t *cur_time)
{
xlator_t *this = THIS;
@@ -4884,7 +4867,7 @@ quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
if (!(*path))
*path = uuid_utoa(inode->gfid);
- gettimeofday(cur_time, NULL);
+ *cur_time = gf_time();
}
/* Logs if
@@ -4895,9 +4878,7 @@ void
quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
int64_t delta)
{
- struct timeval cur_time = {
- 0,
- };
+ time_t cur_time = 0;
char *usage_str = NULL;
char *path = NULL;
int64_t cur_size = 0;
@@ -4923,12 +4904,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
/* Usage is above soft limit */
else if (cur_size > ctx->soft_lim &&
- quota_timeout(&ctx->prev_log, priv->log_timeout)) {
+ quota_timeout(ctx->prev_log_time, priv->log_timeout)) {
quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time);
gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT,
@@ -4939,9 +4920,12 @@ quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
"path=%s",
usage_str, priv->volume_uuid, path);
- ctx->prev_log = cur_time;
+ ctx->prev_log_time = cur_time;
}
+ if (path)
+ GF_FREE(path);
+
if (usage_str)
GF_FREE(usage_str);
}
@@ -4997,6 +4981,43 @@ quota_forget(xlator_t *this, inode_t *inode)
return 0;
}
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ quota_priv_t *priv = NULL;
+ int ret = 0;
+ rpc_clnt_t *rpc = NULL;
+ gf_boolean_t conn_status = _gf_true;
+ xlator_t *victim = data;
+
+ priv = this->private;
+ if (!priv || !priv->is_quota_on)
+ goto out;
+
+ if (event == GF_EVENT_PARENT_DOWN) {
+ rpc = priv->rpc_clnt;
+ if (rpc) {
+ rpc_clnt_disable(rpc);
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ conn_status = priv->conn_status;
+ while (conn_status) {
+ (void)pthread_cond_wait(&priv->conn_cond,
+ &priv->conn_mutex);
+ conn_status = priv->conn_status;
+ }
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
+ }
+ }
+
+out:
+ ret = default_notify(this, event, data);
+ return ret;
+}
+
int32_t
init(xlator_t *this)
{
@@ -5039,6 +5060,10 @@ init(xlator_t *this)
goto err;
}
+ pthread_mutex_init(&priv->conn_mutex, NULL);
+ pthread_cond_init(&priv->conn_cond, NULL);
+ priv->conn_status = _gf_false;
+
if (priv->is_quota_on) {
rpc = quota_enforcer_init(this, this->options);
if (rpc == NULL) {
@@ -5132,13 +5157,14 @@ quota_priv_dump(xlator_t *this)
if (ret)
goto out;
else {
- gf_proc_dump_write("soft-timeout", "%d", priv->soft_timeout);
- gf_proc_dump_write("hard-timeout", "%d", priv->hard_timeout);
- gf_proc_dump_write("alert-time", "%d", priv->log_timeout);
+ gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout);
+ gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout);
+ gf_proc_dump_write("alert-time", "%u", priv->log_timeout);
gf_proc_dump_write("quota-on", "%d", priv->is_quota_on);
gf_proc_dump_write("statfs", "%d", priv->consider_statfs);
gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid);
- gf_proc_dump_write("validation-count", "%ld", priv->validation_count);
+ gf_proc_dump_write("validation-count", "%" PRIu64,
+ priv->validation_count);
}
UNLOCK(&priv->lock);
@@ -5151,20 +5177,22 @@ fini(xlator_t *this)
{
quota_priv_t *priv = NULL;
rpc_clnt_t *rpc = NULL;
- int i = 0, cnt = 0;
priv = this->private;
if (!priv)
return;
rpc = priv->rpc_clnt;
priv->rpc_clnt = NULL;
- this->private = NULL;
if (rpc) {
- cnt = GF_ATOMIC_GET(rpc->refcount);
- for (i = 0; i < cnt; i++)
- rpc_clnt_unref(rpc);
+ rpc_clnt_connection_cleanup(&rpc->conn);
+ rpc_clnt_unref(rpc);
}
+
+ this->private = NULL;
LOCK_DESTROY(&priv->lock);
+ pthread_mutex_destroy(&priv->conn_mutex);
+ pthread_cond_destroy(&priv->conn_cond);
+
GF_FREE(priv);
if (this->local_pool) {
mem_pool_destroy(this->local_pool);
@@ -5292,3 +5320,17 @@ struct volume_options options[] = {
.tags = {},
},
{.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quota",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index 8bcc3ec6176..0395d78c9ef 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -10,25 +10,22 @@
#ifndef _QUOTA_H
#define _QUOTA_H
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
+#include <glusterfs/call-stub.h>
#include "quota-mem-types.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "logging.h"
-#include "dict.h"
-#include "gf-event.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/gf-event.h>
#include "rpcsvc.h"
#include "rpc-clnt.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
#include "glusterfs3-xdr.h"
#include "glusterfs3.h"
#include "xdr-generic.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "protocol-common.h"
-#include "quota-common-utils.h"
+#include <glusterfs/quota-common-utils.h>
#include "quota-messages.h"
#define DIRTY "dirty"
@@ -156,8 +153,8 @@ struct quota_inode_ctx {
int64_t object_soft_lim;
struct iatt buf;
struct list_head parents;
- struct timeval tv;
- struct timeval prev_log;
+ time_t validate_time;
+ time_t prev_log_time;
gf_boolean_t ancestry_built;
gf_lock_t lock;
};
@@ -202,6 +199,7 @@ struct quota_local {
typedef struct quota_local quota_local_t;
struct quota_priv {
+ /* FIXME: consider time_t for timeouts. */
uint32_t soft_timeout;
uint32_t hard_timeout;
uint32_t log_timeout;
@@ -217,6 +215,9 @@ struct quota_priv {
char *volume_uuid;
uint64_t validation_count;
int32_t quotad_conn_status;
+ pthread_mutex_t conn_mutex;
+ pthread_cond_t conn_cond;
+ gf_boolean_t conn_status;
};
typedef struct quota_priv quota_priv_t;
diff --git a/xlators/features/quota/src/quotad-aggregator.c b/xlators/features/quota/src/quotad-aggregator.c
index e0129e4f63a..75d47867b5b 100644
--- a/xlators/features/quota/src/quotad-aggregator.c
+++ b/xlators/features/quota/src/quotad-aggregator.c
@@ -13,7 +13,14 @@
#include "quotad-helpers.h"
#include "quotad-aggregator.h"
-struct rpcsvc_program quotad_aggregator_prog;
+static char *qd_ext_xattrs[] = {
+ QUOTA_SIZE_KEY,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ NULL,
+};
+
+static struct rpcsvc_program quotad_aggregator_prog;
struct iobuf *
quotad_serialize_reply(rpcsvc_request_t *req, void *arg, struct iovec *outmsg,
@@ -132,13 +139,16 @@ quotad_aggregator_getlimit_cbk(xlator_t *this, call_frame_t *frame,
int ret = -1;
int type = 0;
+ if (!rsp || (rsp->op_ret == -1))
+ goto reply;
+
GF_PROTOCOL_DICT_UNSERIALIZE(frame->this, xdata, (rsp->xdata.xdata_val),
(rsp->xdata.xdata_len), rsp->op_ret,
rsp->op_errno, out);
if (xdata) {
state = frame->root->state;
- ret = dict_get_int32n(state->xdata, "type", SLEN("type"), &type);
+ ret = dict_get_int32n(state->req_xdata, "type", SLEN("type"), &type);
if (ret < 0)
goto out;
@@ -166,8 +176,9 @@ out:
}
reply:
- quotad_aggregator_submit_reply(frame, frame->local, (void *)&cli_rsp, NULL,
- 0, NULL, (xdrproc_t)xdr_gf_cli_rsp);
+ quotad_aggregator_submit_reply(frame, (frame) ? frame->local : NULL,
+ (void *)&cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
dict_unref(xdata);
GF_FREE(cli_rsp.dict.dict_val);
@@ -182,22 +193,20 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req)
{0},
};
gf_cli_rsp cli_rsp = {0};
- gfs3_lookup_req args = {
- {
- 0,
- },
- };
quotad_aggregator_state_t *state = NULL;
xlator_t *this = NULL;
dict_t *dict = NULL;
int ret = -1, op_errno = 0;
char *gfid_str = NULL;
uuid_t gfid = {0};
+ char *volume_uuid = NULL;
GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err);
this = THIS;
+ cli_req.dict.dict_val = alloca(req->msg[0].iov_len);
+
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
// failed to decode msg;
@@ -224,6 +233,11 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req)
goto err;
}
+ ret = dict_get_strn(dict, "volume-uuid", SLEN("volume-uuid"), &volume_uuid);
+ if (ret) {
+ goto err;
+ }
+
gf_uuid_parse((const char *)gfid_str, gfid);
frame = quotad_aggregator_get_frame_from_req(req);
@@ -232,7 +246,9 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req)
goto errx;
}
state = frame->root->state;
- state->xdata = dict;
+ state->req_xdata = dict;
+ state->xdata = dict_new();
+ dict = NULL;
ret = dict_set_int32_sizen(state->xdata, QUOTA_LIMIT_KEY, 42);
if (ret)
@@ -253,13 +269,8 @@ quotad_aggregator_getlimit(rpcsvc_request_t *req)
if (ret)
goto err;
- memcpy(&args.gfid, &gfid, 16);
-
- args.bname = alloca(req->msg[0].iov_len);
- args.xdata.xdata_val = alloca(req->msg[0].iov_len);
-
- ret = qd_nameless_lookup(this, frame, &args, state->xdata,
- quotad_aggregator_getlimit_cbk);
+ ret = qd_nameless_lookup(this, frame, (char *)gfid, state->xdata,
+ volume_uuid, quotad_aggregator_getlimit_cbk);
if (ret) {
cli_rsp.op_errno = ret;
goto errx;
@@ -276,7 +287,6 @@ errx:
quotad_aggregator_getlimit_cbk(this, frame, &cli_rsp);
if (dict)
dict_unref(dict);
-
return ret;
}
@@ -299,12 +309,14 @@ quotad_aggregator_lookup(rpcsvc_request_t *req)
0,
},
};
- int ret = -1, op_errno = 0;
+ int i = 0, ret = -1, op_errno = 0;
gfs3_lookup_rsp rsp = {
0,
};
quotad_aggregator_state_t *state = NULL;
xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ char *volume_uuid = NULL;
GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err);
@@ -327,16 +339,34 @@ quotad_aggregator_lookup(rpcsvc_request_t *req)
state = frame->root->state;
- GF_PROTOCOL_DICT_UNSERIALIZE(this, state->xdata, (args.xdata.xdata_val),
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, dict, (args.xdata.xdata_val),
(args.xdata.xdata_len), ret, op_errno, err);
- ret = qd_nameless_lookup(this, frame, &args, state->xdata,
+ ret = dict_get_str(dict, "volume-uuid", &volume_uuid);
+ if (ret) {
+ goto err;
+ }
+
+ state->xdata = dict_new();
+
+ for (i = 0; qd_ext_xattrs[i]; i++) {
+ if (dict_get(dict, qd_ext_xattrs[i])) {
+ ret = dict_set_uint32(state->xdata, qd_ext_xattrs[i], 1);
+ if (ret < 0)
+ goto err;
+ }
+ }
+
+ ret = qd_nameless_lookup(this, frame, args.gfid, state->xdata, volume_uuid,
quotad_aggregator_lookup_cbk);
if (ret) {
rsp.op_errno = ret;
goto err;
}
+ if (dict)
+ dict_unref(dict);
+
return ret;
err:
@@ -344,6 +374,9 @@ err:
rsp.op_errno = op_errno;
quotad_aggregator_lookup_cbk(this, frame, &rsp);
+ if (dict)
+ dict_unref(dict);
+
return ret;
}
@@ -445,15 +478,15 @@ out:
return ret;
}
-rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = {
- [GF_AGGREGATOR_NULL] = {"NULL", GF_AGGREGATOR_NULL, NULL, NULL, 0, DRC_NA},
- [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", GF_AGGREGATOR_NULL,
- quotad_aggregator_lookup, NULL, 0, DRC_NA},
- [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", GF_AGGREGATOR_GETLIMIT,
- quotad_aggregator_getlimit, NULL, 0, DRC_NA},
+static rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", NULL, NULL, GF_AGGREGATOR_NULL, DRC_NA, 0},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", quotad_aggregator_lookup, NULL,
+ GF_AGGREGATOR_NULL, DRC_NA, 0},
+ [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", quotad_aggregator_getlimit, NULL,
+ GF_AGGREGATOR_GETLIMIT, DRC_NA, 0},
};
-struct rpcsvc_program quotad_aggregator_prog = {
+static struct rpcsvc_program quotad_aggregator_prog = {
.progname = "GlusterFS 3.3",
.prognum = GLUSTER_AGGREGATOR_PROGRAM,
.progver = GLUSTER_AGGREGATOR_VERSION,
diff --git a/xlators/features/quota/src/quotad-aggregator.h b/xlators/features/quota/src/quotad-aggregator.h
index 02a0094102f..706592c7d50 100644
--- a/xlators/features/quota/src/quotad-aggregator.h
+++ b/xlators/features/quota/src/quotad-aggregator.h
@@ -12,9 +12,9 @@
#define _QUOTAD_AGGREGATOR_H
#include "quota.h"
-#include "stack.h"
+#include <glusterfs/stack.h>
#include "glusterfs3-xdr.h"
-#include "inode.h"
+#include <glusterfs/inode.h>
typedef struct {
void *pool;
@@ -23,13 +23,15 @@ typedef struct {
inode_table_t *itable;
loc_t loc;
dict_t *xdata;
+ dict_t *req_xdata;
} quotad_aggregator_state_t;
typedef int (*quotad_aggregator_lookup_cbk_t)(xlator_t *this,
call_frame_t *frame, void *rsp);
int
-qd_nameless_lookup(xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
- dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk);
+qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid,
+ dict_t *xdata, char *volume_uuid,
+ quotad_aggregator_lookup_cbk_t lookup_cbk);
int
quotad_aggregator_init(xlator_t *this);
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
index be8f9080f14..51ff1d7e98d 100644
--- a/xlators/features/quota/src/quotad-helpers.c
+++ b/xlators/features/quota/src/quotad-helpers.c
@@ -47,6 +47,9 @@ quotad_aggregator_free_state(quotad_aggregator_state_t *state)
if (state->xdata)
dict_unref(state->xdata);
+ if (state->req_xdata)
+ dict_unref(state->req_xdata);
+
GF_FREE(state);
}
@@ -73,7 +76,6 @@ quotad_aggregator_alloc_frame(rpcsvc_request_t *req)
goto out;
frame->root->state = state;
- frame->root->unique = 0;
frame->this = this;
out:
@@ -93,8 +95,6 @@ quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req)
frame->root->op = req->procnum;
- frame->root->unique = req->xid;
-
frame->root->uid = req->uid;
frame->root->gid = req->gid;
frame->root->pid = req->pid;
diff --git a/xlators/features/quota/src/quotad.c b/xlators/features/quota/src/quotad.c
index 5b0ab83673b..643f25c9c2a 100644
--- a/xlators/features/quota/src/quotad.c
+++ b/xlators/features/quota/src/quotad.c
@@ -9,7 +9,6 @@
*/
#include "quota.h"
#include "quotad-aggregator.h"
-#include "common-utils.h"
int
qd_notify(xlator_t *this, int32_t event, void *data, ...)
@@ -104,8 +103,9 @@ out:
}
int
-qd_nameless_lookup(xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
- dict_t *xdata, quotad_aggregator_lookup_cbk_t lookup_cbk)
+qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid,
+ dict_t *xdata, char *volume_uuid,
+ quotad_aggregator_lookup_cbk_t lookup_cbk)
{
gfs3_lookup_rsp rsp = {
0,
@@ -116,7 +116,6 @@ qd_nameless_lookup(xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
};
quotad_aggregator_state_t *state = NULL;
xlator_t *subvol = NULL;
- char *volume_uuid = NULL;
state = frame->root->state;
@@ -128,14 +127,7 @@ qd_nameless_lookup(xlator_t *this, call_frame_t *frame, gfs3_lookup_req *req,
goto out;
}
- memcpy(loc.gfid, req->gfid, 16);
-
- ret = dict_get_strn(xdata, "volume-uuid", SLEN("volume-uuid"),
- &volume_uuid);
- if (ret < 0) {
- op_errno = EINVAL;
- goto out;
- }
+ memcpy(loc.gfid, gfid, 16);
ret = dict_set_int8(xdata, QUOTA_READ_ONLY_KEY, 1);
if (ret < 0) {
@@ -220,11 +212,6 @@ err:
return ret;
}
-class_methods_t class_methods = {.init = qd_init,
- .fini = qd_fini,
- .reconfigure = qd_reconfigure,
- .notify = qd_notify};
-
struct xlator_fops fops = {};
struct xlator_cbks cbks = {};
@@ -240,4 +227,19 @@ struct volume_options options[] = {
.key = {"transport.*"},
.type = GF_OPTION_TYPE_ANY,
},
- {.key = {NULL}}};
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = qd_init,
+ .fini = qd_fini,
+ .reconfigure = qd_reconfigure,
+ .notify = qd_notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quotad",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/quota/src/quotad.sym b/xlators/features/quota/src/quotad.sym
deleted file mode 100644
index 0829ffe1584..00000000000
--- a/xlators/features/quota/src/quotad.sym
+++ /dev/null
@@ -1,7 +0,0 @@
-fops
-cbks
-class_methods
-options
-mem_acct_init
-reconfigure
-dumpops
diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c
index 39985169991..9640e7e3eee 100644
--- a/xlators/features/read-only/src/read-only-common.c
+++ b/xlators/features/read-only/src/read-only-common.c
@@ -9,7 +9,7 @@
*/
#include "read-only.h"
#include "read-only-mem-types.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
gf_boolean_t
is_readonly_or_worm_enabled(call_frame_t *frame, xlator_t *this)
diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h
index 32719da28f1..5561961ffa2 100644
--- a/xlators/features/read-only/src/read-only-common.h
+++ b/xlators/features/read-only/src/read-only-common.h
@@ -7,8 +7,8 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
gf_boolean_t
is_readonly_or_worm_enabled(call_frame_t *frame, xlator_t *this);
diff --git a/xlators/features/read-only/src/read-only-mem-types.h b/xlators/features/read-only/src/read-only-mem-types.h
index 4baaeb41216..c67d6c02cd0 100644
--- a/xlators/features/read-only/src/read-only-mem-types.h
+++ b/xlators/features/read-only/src/read-only-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __READONLY_MEM_TYPES_H__
#define __READONLY_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_read_only_mem_types_ {
gf_read_only_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c
index c92a9801196..48654998e63 100644
--- a/xlators/features/read-only/src/read-only.c
+++ b/xlators/features/read-only/src/read-only.c
@@ -7,7 +7,6 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "defaults.h"
#include "read-only-common.h"
#include "read-only-mem-types.h"
#include "read-only.h"
@@ -130,3 +129,16 @@ struct volume_options options[] = {
"\"off\" by default."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "read-only",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/read-only/src/read-only.h b/xlators/features/read-only/src/read-only.h
index d74053a2a8f..aced5d3c577 100644
--- a/xlators/features/read-only/src/read-only.h
+++ b/xlators/features/read-only/src/read-only.h
@@ -11,25 +11,26 @@
#ifndef __READONLY_H__
#define __READONLY_H__
-#include "read-only-mem-types.h"
-#include "xlator.h"
+#include <stdint.h> // for uint64_t, uint8_t
+#include <sys/time.h> // for time_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t
typedef struct {
uint8_t worm : 1;
uint8_t retain : 1;
uint8_t legal_hold : 1;
uint8_t ret_mode : 1;
- uint64_t ret_period;
- uint64_t auto_commit_period;
+ int64_t ret_period;
+ int64_t auto_commit_period;
} worm_reten_state_t;
typedef struct {
gf_boolean_t readonly_or_worm_enabled;
gf_boolean_t worm_file;
gf_boolean_t worm_files_deletable;
- uint64_t reten_period;
- uint64_t com_period;
- char *reten_mode;
+ int64_t reten_period;
+ int64_t com_period;
+ int reten_mode;
time_t start_time;
} read_only_priv_t;
diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c
index 3f882fe08d6..df45f2a940b 100644
--- a/xlators/features/read-only/src/worm-helper.c
+++ b/xlators/features/read-only/src/worm-helper.c
@@ -9,8 +9,8 @@
*/
#include "read-only-mem-types.h"
#include "read-only.h"
-#include "xlator.h"
-#include "syncop.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
#include "worm-helper.h"
/*Function to check whether file is read-only.
@@ -41,7 +41,7 @@ worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
GF_VALIDATE_OR_GOTO("worm", this, out);
GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
- start_time = time(NULL);
+ start_time = gf_time();
dict = dict_new();
if (!dict) {
gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
@@ -84,10 +84,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
retention_state->worm = 1;
retention_state->retain = 1;
retention_state->legal_hold = 0;
- if (strcmp(priv->reten_mode, "relax") == 0)
- retention_state->ret_mode = 0;
- else
- retention_state->ret_mode = 1;
+ retention_state->ret_mode = priv->reten_mode;
retention_state->ret_period = priv->reten_period;
retention_state->auto_commit_period = priv->com_period;
if (fop_with_fd)
@@ -97,7 +94,7 @@ worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
if (ret)
goto out;
stbuf->ia_mtime = stpre.ia_mtime;
- stbuf->ia_atime = time(NULL) + retention_state->ret_period;
+ stbuf->ia_atime = gf_time() + retention_state->ret_period;
if (fop_with_fd)
ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
@@ -289,6 +286,7 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
{
int op_errno = EROFS;
int ret = -1;
+ time_t now = 0;
uint64_t com_period = 0;
uint64_t start_time = 0;
dict_t *dict = NULL;
@@ -340,8 +338,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
goto out;
}
- if (ret == -1 && (time(NULL) - start_time) >= com_period) {
- if ((time(NULL) - stbuf.ia_mtime) >= com_period) {
+ now = gf_time();
+
+ if (ret == -1 && (now - start_time) >= com_period) {
+ if ((now - stbuf.ia_mtime) >= com_period) {
ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state,
&stbuf);
if (ret) {
@@ -355,10 +355,10 @@ gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
op_errno = 0;
goto out;
}
- } else if (ret == -1 && (time(NULL) - start_time) < com_period) {
+ } else if (ret == -1 && (now - start_time) < com_period) {
op_errno = 0;
goto out;
- } else if (reten_state.retain && ((time(NULL) >= stbuf.ia_atime))) {
+ } else if (reten_state.retain && ((now >= stbuf.ia_atime))) {
gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf);
}
if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable &&
diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
index db128b75196..1cc5526d5cd 100644
--- a/xlators/features/read-only/src/worm.c
+++ b/xlators/features/read-only/src/worm.c
@@ -7,12 +7,12 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "read-only-common.h"
#include "read-only-mem-types.h"
#include "read-only.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
#include "worm-helper.h"
int32_t
@@ -292,6 +292,12 @@ worm_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
}
+ reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime -
+ stpre.ia_atime;
+ ret = gf_worm_set_xattr(this, &reten_state, _gf_false, loc);
+ if (ret) {
+ goto out;
+ }
stbuf->ia_mtime = stpre.ia_mtime;
}
}
@@ -372,6 +378,13 @@ worm_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
goto out;
}
}
+ reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime -
+ stpre.ia_atime;
+ ret = gf_worm_set_xattr(this, &reten_state, _gf_true, fd);
+ if (ret) {
+ goto out;
+ }
+
stbuf->ia_mtime = stpre.ia_mtime;
}
}
@@ -427,29 +440,22 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
int ret = 0;
read_only_priv_t *priv = NULL;
- dict_t *dict = NULL;
+ // In case of an error exit because fd can be NULL and this would
+ // cause an segfault when performing fsetxattr . We explicitly
+ // unwind to avoid future problems
+ if (op_ret < 0) {
+ goto out;
+ }
priv = this->private;
GF_ASSERT(priv);
if (priv->worm_file) {
- dict = dict_new();
- if (!dict) {
- gf_log(this->name, GF_LOG_ERROR,
- "Error creating the "
- "dict");
- goto out;
- }
- ret = dict_set_int8(dict, "trusted.worm_file", 1);
+ ret = fd_ctx_set(fd, this, 1);
if (ret) {
gf_log(this->name, GF_LOG_ERROR,
- "Error in setting "
- "the dict");
- goto out;
- }
- ret = syncop_fsetxattr(this, fd, dict, 0, NULL, NULL);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR, "Error setting xattr");
- goto out;
+ "Failed to set the fd ctx "
+ "for gfid:%s . Worm feature may not work for the gfid",
+ uuid_utoa(inode->gfid));
}
ret = worm_init_state(this, _gf_true, fd);
if (ret) {
@@ -460,8 +466,6 @@ worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
out:
STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
preparent, postparent, xdata);
- if (dict)
- dict_unref(dict);
return ret;
}
@@ -475,11 +479,21 @@ worm_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
return 0;
}
+static void
+set_reten_mode(read_only_priv_t *priv, char *reten_mode)
+{
+ if (strcmp(reten_mode, "relax") == 0)
+ priv->reten_mode = 0;
+ else
+ priv->reten_mode = 1;
+}
+
int32_t
init(xlator_t *this)
{
int ret = -1;
read_only_priv_t *priv = NULL;
+ char *reten_mode = NULL;
if (!this->children || this->children->next) {
gf_log(this->name, GF_LOG_ERROR,
@@ -509,9 +523,10 @@ init(xlator_t *this)
GF_OPTION_INIT("worm", priv->readonly_or_worm_enabled, bool, out);
GF_OPTION_INIT("worm-file-level", priv->worm_file, bool, out);
- GF_OPTION_INIT("default-retention-period", priv->reten_period, uint64, out);
- GF_OPTION_INIT("auto-commit-period", priv->com_period, uint64, out);
- GF_OPTION_INIT("retention-mode", priv->reten_mode, str, out);
+ GF_OPTION_INIT("default-retention-period", priv->reten_period, int64, out);
+ GF_OPTION_INIT("auto-commit-period", priv->com_period, int64, out);
+ GF_OPTION_INIT("retention-mode", reten_mode, str, out);
+ set_reten_mode(priv, reten_mode);
GF_OPTION_INIT("worm-files-deletable", priv->worm_files_deletable, bool,
out);
@@ -524,6 +539,7 @@ int
reconfigure(xlator_t *this, dict_t *options)
{
read_only_priv_t *priv = NULL;
+ char *reten_mode = NULL;
int ret = -1;
priv = this->private;
@@ -533,9 +549,10 @@ reconfigure(xlator_t *this, dict_t *options)
out);
GF_OPTION_RECONF("worm-file-level", priv->worm_file, options, bool, out);
GF_OPTION_RECONF("default-retention-period", priv->reten_period, options,
- uint64, out);
- GF_OPTION_RECONF("retention-mode", priv->reten_mode, options, str, out);
- GF_OPTION_RECONF("auto-commit-period", priv->com_period, options, uint64,
+ int64, out);
+ GF_OPTION_RECONF("retention-mode", reten_mode, options, str, out);
+ set_reten_mode(priv, reten_mode);
+ GF_OPTION_RECONF("auto-commit-period", priv->com_period, options, int64,
out);
GF_OPTION_RECONF("worm-files-deletable", priv->worm_files_deletable,
options, bool, out);
@@ -556,6 +573,7 @@ fini(xlator_t *this)
mem_put(priv);
this->private = NULL;
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
out:
return;
}
@@ -583,7 +601,62 @@ struct xlator_fops fops = {
.lk = ro_lk,
};
-struct xlator_cbks cbks;
+int32_t
+worm_release(xlator_t *this, fd_t *fd)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ dict = dict_new();
+ uint64_t value = 0;
+ loc_t loc = {
+ 0,
+ };
+ read_only_priv_t *priv = NULL;
+ priv = this->private;
+
+ if (priv->worm_file) {
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
+ goto out;
+ }
+
+ ret = fd_ctx_get(fd, this, &value);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "Failed to get the fd ctx");
+ }
+ if (!value) {
+ goto out;
+ }
+
+ ret = dict_set_int8(dict, "trusted.worm_file", 1);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error in setting "
+ "the dict");
+ goto out;
+ }
+
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
+ ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error setting xattr");
+ goto out;
+ }
+
+ gf_worm_state_transition(this, _gf_false, &loc, GF_FOP_WRITE);
+ }
+
+out:
+ loc_wipe(&loc);
+ if (dict)
+ dict_unref(dict);
+ return 0;
+}
+
+struct xlator_cbks cbks = {
+ .release = worm_release,
+};
struct volume_options options[] = {
{.key = {"worm"},
@@ -634,3 +707,16 @@ struct volume_options options[] = {
.description = "Auto commit period for the files."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "worm",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/sdfs/src/sdfs-messages.h b/xlators/features/sdfs/src/sdfs-messages.h
index cf866c8512a..3053efa8935 100644
--- a/xlators/features/sdfs/src/sdfs-messages.h
+++ b/xlators/features/sdfs/src/sdfs-messages.h
@@ -11,7 +11,7 @@
#ifndef _DFS_MESSAGES_H_
#define _DFS_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* file bit-rot-bitd-messages.h
* brief SDFS log-message IDs and their descriptions
diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c
index 5dbe0653cbc..aaf13f0852e 100644
--- a/xlators/features/sdfs/src/sdfs.c
+++ b/xlators/features/sdfs/src/sdfs.c
@@ -139,6 +139,8 @@ sdfs_get_new_frame_common(call_frame_t *frame, call_frame_t **new_frame)
}
local->main_frame = frame;
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&(*new_frame)->root->lk_owner, (*new_frame)->root);
ret = 0;
err:
@@ -175,9 +177,10 @@ sdfs_get_new_frame(call_frame_t *frame, loc_t *loc, call_frame_t **new_frame)
ret = 0;
err:
- if ((ret < 0) && (*new_frame != NULL)) {
+ if (ret && (*new_frame)) {
SDFS_STACK_DESTROY((*new_frame));
*new_frame = NULL;
+ ret = -1;
}
return ret;
@@ -868,6 +871,8 @@ sdfs_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
op_errno = ENOMEM;
goto err;
}
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root);
gf_client_ref(client);
new_frame->root->client = client;
@@ -1121,6 +1126,8 @@ sdfs_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
op_errno = ENOMEM;
goto err;
}
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root);
gf_client_ref(client);
new_frame->root->client = client;
@@ -1425,12 +1432,12 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
mem_pool_destroy(this->local_pool);
-
- return 0;
+ this->local_pool = NULL;
+ return;
}
struct xlator_fops fops = {
@@ -1451,10 +1458,22 @@ struct xlator_cbks cbks;
struct volume_options options[] = {
{.key = {"pass-through"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "false",
+ .default_value = "true",
.op_version = {GD_OP_VERSION_4_1_0},
.flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
.tags = {"sdfs"},
.description = "Enable/Disable dentry serialize functionality"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "sdfs",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/sdfs/src/sdfs.h b/xlators/features/sdfs/src/sdfs.h
index 986d7c2731c..dded5a2d7fc 100644
--- a/xlators/features/sdfs/src/sdfs.h
+++ b/xlators/features/sdfs/src/sdfs.h
@@ -8,10 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
#include "sdfs-messages.h"
-#include "atomic.h"
+#include <glusterfs/atomic.h>
#define SDFS_LOCK_COUNT_MAX 2
diff --git a/xlators/features/selinux/src/selinux-mem-types.h b/xlators/features/selinux/src/selinux-mem-types.h
index a8c544fba52..553e59e5a9d 100644
--- a/xlators/features/selinux/src/selinux-mem-types.h
+++ b/xlators/features/selinux/src/selinux-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __SELINUX_MEM_TYPES_H__
#define __SELINUX_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_selinux_mem_types_ {
gf_selinux_mt_selinux_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/selinux/src/selinux-messages.h b/xlators/features/selinux/src/selinux-messages.h
index 1f5739d8dc7..f49a54f956c 100644
--- a/xlators/features/selinux/src/selinux-messages.h
+++ b/xlators/features/selinux/src/selinux-messages.h
@@ -11,7 +11,7 @@
#ifndef _SELINUX_MESSAGES_H__
#define _SELINUX_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c
index 91e74d1a3fc..9b1b4b55e1a 100644
--- a/xlators/features/selinux/src/selinux.c
+++ b/xlators/features/selinux/src/selinux.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "selinux.h"
#include "selinux-messages.h"
#include "selinux-mem-types.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
static int
selinux_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
@@ -234,7 +234,6 @@ init(xlator_t *this)
priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t);
if (!priv) {
gf_log(this->name, GF_LOG_ERROR, "out of memory");
- ret = ENOMEM;
goto out;
}
@@ -242,7 +241,6 @@ init(xlator_t *this)
this->local_pool = mem_pool_new(selinux_priv_t, 64);
if (!this->local_pool) {
- ret = -1;
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM,
"Failed to create local_t's memory pool");
goto out;
@@ -252,10 +250,9 @@ init(xlator_t *this)
ret = 0;
out:
if (ret) {
- if (priv) {
- GF_FREE(priv);
- }
+ GF_FREE(priv);
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
}
return ret;
}
@@ -284,6 +281,7 @@ fini(xlator_t *this)
GF_FREE(priv);
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
return;
}
@@ -310,3 +308,16 @@ struct volume_options options[] = {
{
.key = {NULL},
}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "selinux",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/selinux/src/selinux.h b/xlators/features/selinux/src/selinux.h
index 787bff348f0..1bbdad3bb36 100644
--- a/xlators/features/selinux/src/selinux.h
+++ b/xlators/features/selinux/src/selinux.h
@@ -10,7 +10,7 @@
#ifndef __SELINUX_H__
#define __SELINUX_H__
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#define SELINUX_XATTR "security.selinux"
#define SELINUX_GLUSTER_XATTR "trusted.glusterfs.selinux"
diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h
index 39a57ba6fd0..1fe7e2e2798 100644
--- a/xlators/features/shard/src/shard-mem-types.h
+++ b/xlators/features/shard/src/shard-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __SHARD_MEM_TYPES_H__
#define __SHARD_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_shard_mem_types_ {
gf_shard_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
index 89a96709219..2d0867eb136 100644
--- a/xlators/features/shard/src/shard-messages.h
+++ b/xlators/features/shard/src/shard-messages.h
@@ -11,7 +11,7 @@
#ifndef _SHARD_MESSAGES_H_
#define _SHARD_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 5ffeaa63628..e5f93063943 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -12,9 +12,9 @@
#include "shard.h"
#include "shard-mem-types.h"
-#include "byte-order.h"
-#include "defaults.h"
-#include "statedump.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/statedump.h>
static gf_boolean_t
__is_shard_dir(uuid_t gfid)
@@ -80,7 +80,8 @@ __shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
INIT_LIST_HEAD(&ctx_p->ilist);
INIT_LIST_HEAD(&ctx_p->to_fsync_list);
- ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p);
+ ctx_uint = (uint64_t)(uintptr_t)ctx_p;
+ ret = __inode_ctx_set(inode, this, &ctx_uint);
if (ret < 0) {
GF_FREE(ctx_p);
return ret;
@@ -273,6 +274,7 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
* of the to_fsync_list.
*/
inode_ref(base_inode);
+ inode_ref(shard_inode);
LOCK(&base_inode->lock);
LOCK(&shard_inode->lock);
@@ -286,8 +288,10 @@ shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
/* Unref the base inode corresponding to the ref above, if the shard is
* found to be already part of the fsync list.
*/
- if (ret != 0)
+ if (ret != 0) {
inode_unref(base_inode);
+ inode_unref(shard_inode);
+ }
return ret;
}
@@ -509,6 +513,9 @@ shard_local_wipe(shard_local_t *local)
loc_wipe(&local->int_entrylk.loc);
loc_wipe(&local->newloc);
+ if (local->name)
+ GF_FREE(local->name);
+
if (local->int_entrylk.basename)
GF_FREE(local->int_entrylk.basename);
if (local->fd)
@@ -686,8 +693,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
ctx->block_num = block_num;
list_add_tail(&ctx->ilist, &priv->ilist_head);
priv->inode_count++;
- if (base_inode)
- ctx->base_inode = inode_ref(base_inode);
+ ctx->base_inode = inode_ref(base_inode);
} else {
/*If on the other hand there is no available slot for this inode
* in the list, delete the lru inode from the head of the list,
@@ -734,6 +740,10 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
inode_forget(lru_inode, 0);
} else {
+ /* The following unref corresponds to the ref
+ * held when the shard was added to fsync list.
+ */
+ inode_unref(lru_inode);
fsync_inode = lru_inode;
if (lru_base_inode)
inode_unref(lru_base_inode);
@@ -758,8 +768,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
else
gf_uuid_copy(ctx->base_gfid, gfid);
ctx->block_num = block_num;
- if (base_inode)
- ctx->base_inode = inode_ref(base_inode);
+ ctx->base_inode = inode_ref(base_inode);
list_add_tail(&ctx->ilist, &priv->ilist_head);
}
} else {
@@ -879,26 +888,34 @@ int
shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
call_frame_t *frame, int32_t op_ret)
{
- shard_local_t *local = NULL;
+ shard_local_t *local = frame->local;
- local = frame->local;
+ /* the below 3 variables are required because, in SHARD_STACK_UNWIND()
+ macro, there is a check for local being null. So many static analyzers
+ backtrace the code with assumption of possible (local == NULL) case,
+ and complains for below lines. By handling it like below, we overcome
+ the warnings */
+
+ struct iatt *prebuf = ((local) ? &local->prebuf : NULL);
+ struct iatt *postbuf = ((local) ? &local->postbuf : NULL);
+ dict_t *xattr_rsp = ((local) ? local->xattr_rsp : NULL);
switch (fop) {
case GF_FOP_WRITE:
- SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ SHARD_STACK_UNWIND(writev, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
break;
case GF_FOP_FALLOCATE:
- SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
break;
case GF_FOP_ZEROFILL:
- SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
break;
case GF_FOP_DISCARD:
- SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf,
- &local->postbuf, local->xattr_rsp);
+ SHARD_STACK_UNWIND(discard, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
break;
default:
gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
@@ -987,6 +1004,10 @@ shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
}
int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+ xlator_t *this);
+
+int
shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
shard_post_resolve_fop_handler_t post_res_handler)
{
@@ -1003,21 +1024,47 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
shard_local_t *local = NULL;
+ uint64_t resolve_count = 0;
priv = this->private;
local = frame->local;
local->call_count = 0;
shard_idx_iter = local->first_block;
res_inode = local->resolver_base_inode;
+
+ if ((local->op_ret < 0) || (local->resolve_not))
+ goto out;
+
+ /* If this prealloc FOP is for fresh file creation, then the size of the
+ * file will be 0. Then there will be no shards associated with this file.
+ * So we can skip the lookup process for the shards which do not exists
+ * and directly issue mknod to crete shards.
+ *
+ * In case the prealloc fop is to extend the preallocated file to bigger
+ * size then just lookup and populate inodes of existing shards and
+ * update the create count
+ */
+ if (local->fop == GF_FOP_FALLOCATE) {
+ if (!local->prebuf.ia_size) {
+ local->inode_list[0] = inode_ref(res_inode);
+ local->create_count = local->last_block;
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
+ return 0;
+ }
+ if (local->prebuf.ia_size < local->total_size)
+ local->create_count = local->last_block -
+ ((local->prebuf.ia_size - 1) /
+ local->block_size);
+ }
+
+ resolve_count = local->last_block - local->create_count;
+
if (res_inode)
gf_uuid_copy(gfid, res_inode->gfid);
else
gf_uuid_copy(gfid, local->base_gfid);
- if ((local->op_ret < 0) || (local->resolve_not))
- goto out;
-
- while (shard_idx_iter <= local->last_block) {
+ while (shard_idx_iter <= resolve_count) {
i++;
if (shard_idx_iter == 0) {
local->inode_list[i] = inode_ref(res_inode);
@@ -1130,6 +1177,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
{
int ret = -1;
int64_t *size_attr = NULL;
+ int64_t delta_blocks = 0;
inode_t *inode = NULL;
shard_local_t *local = NULL;
dict_t *xattr_req = NULL;
@@ -1151,13 +1199,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
/* If both size and block count have not changed, then skip the xattrop.
*/
- if ((local->delta_size + local->hole_size == 0) &&
- (local->delta_blocks == 0)) {
+ delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
+ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
goto out;
}
ret = shard_set_size_attrs(local->delta_size + local->hole_size,
- local->delta_blocks, &size_attr);
+ delta_blocks, &size_attr);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
"Failed to set size attrs for %s", uuid_utoa(inode->gfid));
@@ -1461,16 +1509,45 @@ int
shard_start_background_deletion(xlator_t *this)
{
int ret = 0;
+ gf_boolean_t i_cleanup = _gf_true;
+ shard_priv_t *priv = NULL;
call_frame_t *cleanup_frame = NULL;
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ switch (priv->bg_del_state) {
+ case SHARD_BG_DELETION_NONE:
+ i_cleanup = _gf_true;
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+ break;
+ case SHARD_BG_DELETION_LAUNCHING:
+ i_cleanup = _gf_false;
+ break;
+ case SHARD_BG_DELETION_IN_PROGRESS:
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+ i_cleanup = _gf_false;
+ break;
+ default:
+ break;
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (!i_cleanup)
+ return 0;
+
cleanup_frame = create_frame(this, this->ctx->pool);
if (!cleanup_frame) {
gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
"Failed to create "
"new frame to delete shards");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err;
}
+ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+
ret = synctask_new(this->ctx->env, shard_delete_shards,
shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
if (ret < 0) {
@@ -1479,7 +1556,16 @@ shard_start_background_deletion(xlator_t *this)
"failed to create task to do background "
"cleanup of shards");
STACK_DESTROY(cleanup_frame->root);
+ goto err;
}
+ return 0;
+
+err:
+ LOCK(&priv->lock);
+ {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ }
+ UNLOCK(&priv->lock);
return ret;
}
@@ -1488,7 +1574,7 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int ret = 0;
+ int ret = -1;
shard_priv_t *priv = NULL;
gf_boolean_t i_start_cleanup = _gf_false;
@@ -1521,23 +1607,25 @@ shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
LOCK(&priv->lock);
{
- if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) {
- priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS;
+ if (priv->first_lookup_done == _gf_false) {
+ priv->first_lookup_done = _gf_true;
i_start_cleanup = _gf_true;
}
}
UNLOCK(&priv->lock);
- if (i_start_cleanup) {
- ret = shard_start_background_deletion(this);
- if (ret) {
- LOCK(&priv->lock);
- {
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
- }
- UNLOCK(&priv->lock);
+ if (!i_start_cleanup)
+ goto unwind;
+
+ ret = shard_start_background_deletion(this);
+ if (ret < 0) {
+ LOCK(&priv->lock);
+ {
+ priv->first_lookup_done = _gf_false;
}
+ UNLOCK(&priv->lock);
}
+
unwind:
SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
postparent);
@@ -1553,7 +1641,8 @@ shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
shard_local_t *local = NULL;
this->itable = loc->inode->table;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
+ (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) {
SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
}
@@ -1603,26 +1692,24 @@ err:
}
int
-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata,
- struct iatt *postparent)
+shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
int ret = -1;
int32_t mask = SHARD_INODE_WRITE_MASK;
- shard_local_t *local = NULL;
+ shard_local_t *local = frame->local;
shard_inode_ctx_t ctx = {
0,
};
- local = frame->local;
-
if (op_ret < 0) {
gf_msg(this->name, GF_LOG_ERROR, op_errno,
SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
"Lookup on base file"
" failed : %s",
- loc_gfid_utoa(&(local->loc)));
+ uuid_utoa(inode->gfid));
local->op_ret = op_ret;
local->op_errno = op_errno;
goto unwind;
@@ -1656,18 +1743,57 @@ unwind:
}
int
-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
- shard_post_fop_handler_t handler)
+shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ shard_local_t *local = frame->local;
+
+ shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret,
+ op_errno, buf, xdata);
+ return 0;
+}
+
+int
+shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ /* In case of op_ret < 0, inode passed to this function will be NULL
+ ex: in case of op_errno = ENOENT. So refer prefilled inode data
+ which is part of local.
+ Note: Reassigning/overriding the inode passed to this cbk with inode
+ which is part of *struct shard_local_t* won't cause any issue as
+ both inodes have same reference/address as of the inode passed */
+ inode = ((shard_local_t *)frame->local)->loc.inode;
+
+ shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno,
+ buf, xdata);
+ return 0;
+}
+
+/* This function decides whether to make file based lookup or
+ * fd based lookup (fstat) depending on the 3rd and 4th arg.
+ * If fd != NULL and loc == NULL then call is for fstat
+ * If fd == NULL and loc != NULL then call is for file based
+ * lookup. Please pass args based on the requirement.
+ */
+int
+shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, shard_post_fop_handler_t handler)
{
int ret = -1;
+ inode_t *inode = NULL;
shard_local_t *local = NULL;
dict_t *xattr_req = NULL;
gf_boolean_t need_refresh = _gf_false;
local = frame->local;
local->handler = handler;
+ inode = fd ? fd->inode : loc->inode;
- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf,
+ ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf,
&need_refresh);
/* By this time, inode ctx should have been created either in create,
* mknod, readdirp or lookup. If not it is a bug!
@@ -1676,7 +1802,7 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_msg_debug(this->name, 0,
"Skipping lookup on base file: %s"
"Serving prebuf off the inode ctx cache",
- uuid_utoa(loc->gfid));
+ uuid_utoa(inode->gfid));
goto out;
}
@@ -1687,10 +1813,14 @@ shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out);
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out);
- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ if (fd)
+ STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xattr_req);
+ else
+ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
dict_unref(xattr_req);
return 0;
@@ -1902,6 +2032,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
dict_t *xdata)
{
inode_t *inode = NULL;
+ int64_t delta_blocks = 0;
shard_local_t *local = NULL;
local = frame->local;
@@ -1922,14 +2053,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
local->postbuf.ia_size = local->offset;
- local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks);
/* Let the delta be negative. We want xattrop to do subtraction */
local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
- local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks;
+ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
+ postbuf->ia_blocks - prebuf->ia_blocks);
+ GF_ASSERT(delta_blocks <= 0);
+ local->postbuf.ia_blocks += delta_blocks;
local->hole_size = 0;
- shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES);
-
+ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
shard_update_file_size(frame, this, NULL, &local->loc,
shard_post_update_size_truncate_handler);
return 0;
@@ -1957,10 +2089,9 @@ shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
*/
if (!inode) {
gf_msg_debug(this->name, 0,
- "Last shard to be truncated absent"
- " in backend: %s. Directly proceeding to update "
- "file size",
- uuid_utoa(inode->gfid));
+ "Last shard to be truncated absent in backend: %" PRIu64
+ " of gfid %s. Directly proceeding to update file size",
+ local->first_block, uuid_utoa(local->loc.inode->gfid));
shard_update_file_size(frame, this, NULL, &local->loc,
shard_post_update_size_truncate_handler);
return 0;
@@ -1989,8 +2120,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *preparent, struct iatt *postparent,
dict_t *xdata)
{
+ int ret = 0;
int call_count = 0;
int shard_block_num = (long)cookie;
+ uint64_t block_count = 0;
shard_local_t *local = NULL;
local = frame->local;
@@ -2000,6 +2133,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
goto done;
}
+ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
+ if (!ret) {
+ GF_ATOMIC_SUB(local->delta_blocks, block_count);
+ } else {
+ /* dict_get failed possibly due to a heterogeneous cluster? */
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to get key %s from dict during truncate of gfid %s",
+ GF_GET_FILE_BLOCK_COUNT,
+ uuid_utoa(local->resolver_base_inode->gfid));
+ }
shard_unlink_block_inode(local, shard_block_num);
done:
@@ -2029,6 +2172,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
gf_boolean_t wind_failed = _gf_false;
shard_local_t *local = NULL;
shard_priv_t *priv = NULL;
+ dict_t *xdata_req = NULL;
local = frame->local;
priv = this->private;
@@ -2056,7 +2200,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
local->postbuf.ia_size = local->offset;
local->postbuf.ia_blocks = local->prebuf.ia_blocks;
local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
- local->delta_blocks = 0;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
local->hole_size = 0;
shard_update_file_size(frame, this, local->fd, &local->loc,
shard_post_update_size_truncate_handler);
@@ -2065,6 +2209,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
local->call_count = call_count;
i = 1;
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+ }
+ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set key %s into dict during truncate of %s",
+ GF_GET_FILE_BLOCK_COUNT,
+ uuid_utoa(local->resolver_base_inode->gfid));
+ dict_unref(xdata_req);
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+ }
SHARD_SET_ROOT_FS_ID(frame, local);
while (cur_block <= last_block) {
@@ -2103,7 +2262,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
(void *)(long)cur_block, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL);
+ FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
loc_wipe(&loc);
next:
i++;
@@ -2111,6 +2270,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
if (!--call_count)
break;
}
+ dict_unref(xdata_req);
return 0;
}
@@ -2166,13 +2326,19 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
xlator_t *this = NULL;
inode_t *fsync_inode = NULL;
shard_priv_t *priv = NULL;
+ inode_t *base_inode = NULL;
this = THIS;
priv = this->private;
- if (local->loc.inode)
+ if (local->loc.inode) {
gf_uuid_copy(gfid, local->loc.inode->gfid);
- else
+ base_inode = local->loc.inode;
+ } else if (local->resolver_base_inode) {
+ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+ base_inode = local->resolver_base_inode;
+ } else {
gf_uuid_copy(gfid, local->base_gfid);
+ }
shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
@@ -2185,7 +2351,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
LOCK(&priv->lock);
{
fsync_inode = __shard_update_shards_inode_list(
- linked_inode, this, local->loc.inode, block_num, gfid);
+ linked_inode, this, base_inode, block_num, gfid);
}
UNLOCK(&priv->lock);
if (fsync_inode)
@@ -2307,7 +2473,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
int count = 0;
int call_count = 0;
int32_t shard_idx_iter = 0;
- int last_block = 0;
+ int lookup_count = 0;
char path[PATH_MAX] = {
0,
};
@@ -2327,7 +2493,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
local = frame->local;
count = call_count = local->call_count;
shard_idx_iter = local->first_block;
- last_block = local->last_block;
+ lookup_count = local->last_block - local->create_count;
local->pls_fop_handler = handler;
if (local->lookup_shards_barriered)
local->barrier.waitfor = local->call_count;
@@ -2337,7 +2503,7 @@ shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
else
gf_uuid_copy(gfid, local->base_gfid);
- while (shard_idx_iter <= last_block) {
+ while (shard_idx_iter <= lookup_count) {
if (local->inode_list[i]) {
i++;
shard_idx_iter++;
@@ -2482,6 +2648,7 @@ shard_truncate_begin(call_frame_t *frame, xlator_t *this)
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
? local->loc.inode
: local->fd->inode;
@@ -2557,7 +2724,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
*/
local->hole_size = local->offset - local->prebuf.ia_size;
local->delta_size = 0;
- local->delta_blocks = 0;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
local->postbuf.ia_size = local->offset;
tmp_stbuf.ia_size = local->offset;
shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
@@ -2573,7 +2740,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
*/
local->hole_size = 0;
local->delta_size = (local->offset - local->prebuf.ia_size);
- local->delta_blocks = 0;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
tmp_stbuf.ia_size = local->offset;
shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
SHARD_INODE_WRITE_MASK);
@@ -2629,9 +2796,10 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
if (!local->xattr_req)
goto err;
local->resolver_base_inode = loc->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_truncate_handler);
return 0;
err:
@@ -2684,9 +2852,10 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
local->resolver_base_inode = fd->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_truncate_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_truncate_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
@@ -2830,8 +2999,8 @@ shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!local->xattr_req)
goto err;
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_link_handler);
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_link_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
@@ -2845,13 +3014,20 @@ int
shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this)
{
shard_local_t *local = NULL;
+ uuid_t gfid = {
+ 0,
+ };
local = frame->local;
+ if (local->resolver_base_inode)
+ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+ else
+ gf_uuid_copy(gfid, local->base_gfid);
+
if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
- "failed to delete shards of %s",
- uuid_utoa(local->resolver_base_inode->gfid));
+ "failed to delete shards of %s", uuid_utoa(gfid));
return 0;
}
local->op_ret = 0;
@@ -2892,8 +3068,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
shard_priv_t *priv = NULL;
shard_inode_ctx_t *ctx = NULL;
shard_inode_ctx_t *base_ictx = NULL;
- gf_boolean_t unlink_unref_forget = _gf_false;
int unref_base_inode = 0;
+ int unref_shard_inode = 0;
this = THIS;
priv = this->private;
@@ -2918,26 +3094,27 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
list_del_init(&ctx->ilist);
priv->inode_count--;
unref_base_inode++;
+ unref_shard_inode++;
GF_ASSERT(priv->inode_count >= 0);
- unlink_unref_forget = _gf_true;
}
if (ctx->fsync_needed) {
unref_base_inode++;
+ unref_shard_inode++;
list_del_init(&ctx->to_fsync_list);
- if (base_inode)
+ if (base_inode) {
__shard_inode_ctx_get(base_inode, this, &base_ictx);
- if (base_ictx)
base_ictx->fsync_count--;
+ }
}
}
UNLOCK(&inode->lock);
if (base_inode)
UNLOCK(&base_inode->lock);
- if (unlink_unref_forget) {
- inode_unlink(inode, priv->dot_shard_inode, block_bname);
- inode_unref(inode);
- inode_forget(inode, 0);
- }
+
+ inode_unlink(inode, priv->dot_shard_inode, block_bname);
+ inode_ref_reduce_by_n(inode, unref_shard_inode);
+ inode_forget(inode, 0);
+
if (base_inode && unref_base_inode)
inode_ref_reduce_by_n(base_inode, unref_base_inode);
UNLOCK(&priv->lock);
@@ -3339,9 +3516,13 @@ shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
loc.inode = inode_ref(priv->dot_shard_rm_inode);
ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL);
- if (ret)
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ ret = 0;
+ }
goto out;
+ }
{
ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode);
}
@@ -3355,20 +3536,6 @@ out:
int
shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data)
{
- xlator_t *this = NULL;
- shard_priv_t *priv = NULL;
-
- this = frame->this;
- priv = this->private;
-
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_WARNING, -ret,
- SHARD_MSG_SHARDS_DELETION_FAILED,
- "Background deletion of shards failed");
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
- } else {
- priv->first_lookup = SHARD_FIRST_LOOKUP_DONE;
- }
SHARD_STACK_DESTROY(frame);
return 0;
}
@@ -3490,6 +3657,7 @@ shard_delete_shards(void *opaque)
gf_dirent_t entries;
gf_dirent_t *entry = NULL;
call_frame_t *cleanup_frame = NULL;
+ gf_boolean_t done = _gf_false;
this = THIS;
priv = this->private;
@@ -3506,6 +3674,7 @@ shard_delete_shards(void *opaque)
goto err;
}
cleanup_frame->local = local;
+ local->fop = GF_FOP_UNLINK;
local->xattr_req = dict_new();
if (!local->xattr_req) {
@@ -3543,51 +3712,76 @@ shard_delete_shards(void *opaque)
goto err;
}
- while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
- &entries, local->xattr_req, NULL))) {
- if (ret > 0)
- ret = 0;
- list_for_each_entry(entry, &entries.list, list)
+ for (;;) {
+ offset = 0;
+ LOCK(&priv->lock);
{
- offset = entry->d_off;
-
- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
- continue;
+ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ done = _gf_true;
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (done)
+ break;
+ while (
+ (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
+ &entries, local->xattr_req, NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
- if (!entry->inode) {
- ret = shard_lookup_marker_entry(this, local, entry);
- if (ret < 0)
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
- }
- link_inode = inode_link(entry->inode, local->fd->inode,
- entry->d_name, &entry->d_stat);
- gf_msg_debug(this->name, 0,
- "Initiating deletion of "
- "shards of gfid %s",
- entry->d_name);
- ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
- link_inode);
- inode_unlink(link_inode, local->fd->inode, entry->d_name);
- inode_unref(link_inode);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- SHARD_MSG_SHARDS_DELETION_FAILED,
- "Failed to clean up shards of gfid %s", entry->d_name);
- continue;
+ if (!entry->inode) {
+ ret = shard_lookup_marker_entry(this, local, entry);
+ if (ret < 0)
+ continue;
+ }
+ link_inode = inode_link(entry->inode, local->fd->inode,
+ entry->d_name, &entry->d_stat);
+
+ gf_msg_debug(this->name, 0,
+ "Initiating deletion of "
+ "shards of gfid %s",
+ entry->d_name);
+ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
+ link_inode);
+ inode_unlink(link_inode, local->fd->inode, entry->d_name);
+ inode_unref(link_inode);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Failed to clean up shards of gfid %s",
+ entry->d_name);
+ continue;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ SHARD_MSG_SHARD_DELETION_COMPLETED,
+ "Deleted "
+ "shards of gfid=%s from backend",
+ entry->d_name);
}
- gf_msg(this->name, GF_LOG_INFO, 0,
- SHARD_MSG_SHARD_DELETION_COMPLETED,
- "Deleted "
- "shards of gfid=%s from backend",
- entry->d_name);
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
}
- gf_dirent_free(&entries);
- if (ret)
- break;
}
ret = 0;
+ loc_wipe(&loc);
+ return ret;
+
err:
+ LOCK(&priv->lock);
+ {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ }
+ UNLOCK(&priv->lock);
loc_wipe(&loc);
return ret;
}
@@ -3925,6 +4119,7 @@ shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = op_ret;
local->op_errno = op_errno;
} else {
+ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
local->preoldparent = *preparent;
local->postoldparent = *postparent;
if (xdata)
@@ -4134,8 +4329,8 @@ shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
switch (local->fop) {
case GF_FOP_UNLINK:
case GF_FOP_RENAME:
- shard_lookup_base_file(frame, this, &local->int_inodelk.loc,
- shard_post_lookup_base_shard_rm_handler);
+ shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL,
+ shard_post_lookup_base_shard_rm_handler);
break;
default:
gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
@@ -4390,8 +4585,8 @@ shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->block_size) {
local->tmp_loc.inode = inode_new(this->itable);
gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
- shard_lookup_base_file(frame, this, &local->tmp_loc,
- shard_post_rename_lookup_handler);
+ shard_refresh_base_file(frame, this, &local->tmp_loc, NULL,
+ shard_post_rename_lookup_handler);
} else {
shard_rename_cbk(frame, this);
}
@@ -4648,6 +4843,8 @@ out:
if (xdata)
local->xattr_rsp = dict_ref(xdata);
vec.iov_base = local->iobuf->ptr;
+ if (local->offset + local->req_size > local->prebuf.ia_size)
+ local->total_size = local->prebuf.ia_size - local->offset;
vec.iov_len = local->total_size;
local->op_ret = local->total_size;
SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno,
@@ -5028,6 +5225,7 @@ shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->resolver_base_inode = local->loc.inode;
local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
@@ -5124,8 +5322,8 @@ shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_readv_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_readv_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
@@ -5226,7 +5424,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
local->op_errno = op_errno;
} else {
local->written_size += op_ret;
- local->delta_blocks += (post->ia_blocks - pre->ia_blocks);
+ GF_ATOMIC_ADD(local->delta_blocks,
+ post->ia_blocks - pre->ia_blocks);
local->delta_size += (post->ia_size - pre->ia_size);
shard_inode_ctx_set(local->fd->inode, this, post, 0,
SHARD_MASK_TIMES);
@@ -5365,21 +5564,17 @@ shard_common_inode_write_do(call_frame_t *frame, xlator_t *this)
remaining_size -= shard_write_size;
if (local->fop == GF_FOP_WRITE) {
+ vec = NULL;
count = iov_subset(local->vector, local->count, vec_offset,
- vec_offset + shard_write_size, NULL);
-
- vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec);
- if (!vec) {
+ shard_write_size, &vec, 0);
+ if (count < 0) {
local->op_ret = -1;
local->op_errno = ENOMEM;
wind_failed = _gf_true;
- GF_FREE(vec);
shard_common_inode_write_do_cbk(frame, (void *)(long)0, this,
-1, ENOMEM, NULL, NULL, NULL);
goto next;
}
- count = iov_subset(local->vector, local->count, vec_offset,
- vec_offset + shard_write_size, vec);
}
if (cur_block == 0) {
@@ -5491,6 +5686,8 @@ shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
shard_common_lookup_shards(
frame, this, local->resolver_base_inode,
shard_common_inode_write_post_lookup_shards_handler);
+ } else if (local->create_count) {
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
} else {
shard_common_inode_write_do(frame, this);
}
@@ -5521,6 +5718,7 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
local->last_block = get_highest_block(local->offset, local->total_size,
local->block_size);
local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
gf_shard_mt_inode_list);
if (!local->inode_list) {
@@ -5529,9 +5727,9 @@ shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
}
gf_msg_trace(this->name, 0,
- "%s: gfid=%s first_block=%" PRIu32
+ "%s: gfid=%s first_block=%" PRIu64
" "
- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64
+ "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64
" total_size=%zu flags=%" PRId32 "",
gf_fop_list[local->fop],
uuid_utoa(local->resolver_base_inode->gfid),
@@ -5736,6 +5934,7 @@ shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
shard_inode_ctx_t *ctx = NULL;
shard_inode_ctx_t *base_ictx = NULL;
inode_t *base_inode = NULL;
+ gf_boolean_t unref_shard_inode = _gf_false;
local = frame->local;
base_inode = local->fd->inode;
@@ -5769,11 +5968,16 @@ out:
if (ctx->fsync_needed != 0) {
list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
base_ictx->fsync_count++;
+ } else {
+ unref_shard_inode = _gf_true;
}
}
UNLOCK(&anon_fd->inode->lock);
UNLOCK(&base_inode->lock);
}
+
+ if (unref_shard_inode)
+ inode_unref(anon_fd->inode);
if (anon_fd)
fd_unref(anon_fd);
@@ -5920,8 +6124,8 @@ shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_post_lookup_fsync_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_fsync_handler);
return 0;
err:
shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
@@ -6113,48 +6317,210 @@ shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
}
int32_t
-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local,
+ char *key)
{
- int op_errno = EINVAL;
+ int ret = 0;
+ struct iatt *tmpbuf = NULL;
+ struct iatt *stbuf = NULL;
+ data_t *data = NULL;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, key);
+ if (!data)
+ return 0;
+
+ tmpbuf = data_to_iatt(data, key);
+ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+ if (stbuf == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
}
+ *stbuf = *tmpbuf;
+ stbuf->ia_size = local->prebuf.ia_size;
+ stbuf->ia_blocks = local->prebuf.ia_blocks;
+ ret = dict_set_iatt(xdata, key, stbuf, false);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ return 0;
- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+err:
+ GF_FREE(stbuf);
+ return -1;
+}
+
+int32_t
+shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ xdata);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno);
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
return 0;
}
int32_t
-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this)
{
- int op_errno = EINVAL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+ if (local->fd)
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, local->fd,
+ local->name, local->xattr_req);
+ else
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, &local->loc,
+ local->name, local->xattr_req);
+ return 0;
+}
+
+int32_t
+shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ /* If shard's special xattrs are attempted to be removed,
+ * fail the fop with EPERM (except if the client is gsyncd).
+ */
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err);
}
+ /* Repeat the same check for bulk-removexattr */
if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+
+ if (name) {
+ local->name = gf_strdup(name);
+ if (!local->name)
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_remove_xattr_handler);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno);
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name,
+ xdata);
return 0;
}
@@ -6235,38 +6601,164 @@ out:
}
int32_t
-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
+shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int op_errno = EINVAL;
+ int ret = -1;
+ shard_local_t *local = NULL;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno);
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
return 0;
}
int32_t
-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this)
{
- int op_errno = EINVAL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->fd)
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, local->fd,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ else
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &local->loc,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ return 0;
+}
+
+int32_t
+shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+ }
+ /* Sharded or not, if shard's special xattrs are attempted to be set,
+ * fail the fop with EPERM (except if the client is gsyncd.
+ */
if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out);
+ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err);
+ }
+
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
}
- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+ local->flags = flags;
+ /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict
+ * and the xdata dict
+ */
+ if (dict)
+ local->xattr_req = dict_ref(dict);
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_set_xattr_handler);
return 0;
-out:
- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno);
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags,
+ xdata);
return 0;
}
@@ -6524,12 +7016,13 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
local->fd = fd_ref(fd);
local->block_size = block_size;
local->resolver_base_inode = local->fd->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
local->loc.inode = inode_ref(fd->inode);
gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
- shard_lookup_base_file(frame, this, &local->loc,
- shard_common_inode_write_post_lookup_handler);
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_common_inode_write_post_lookup_handler);
return 0;
out:
shard_common_failure_unwind(fop, frame, -1, ENOMEM);
@@ -6674,6 +7167,9 @@ fini(xlator_t *this)
GF_VALIDATE_OR_GOTO("shard", this, out);
+ /*Itable was not created by shard, hence setting to NULL.*/
+ this->itable = NULL;
+
mem_pool_destroy(this->local_pool);
this->local_pool = NULL;
@@ -6821,6 +7317,14 @@ struct xlator_dumpops dumpops = {
struct volume_options options[] = {
{
+ .key = {"shard"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable shard",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
.key = {"shard-block-size"},
.type = GF_OPTION_TYPE_SIZET,
.op_version = {GD_OP_VERSION_3_7_0},
@@ -6862,3 +7366,17 @@ struct volume_options options[] = {
},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "shard",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index f877591faee..4fe181b64d5 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -11,10 +11,10 @@
#ifndef __SHARD_H__
#define __SHARD_H__
-#include "xlator.h"
-#include "compat-errno.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/compat-errno.h>
#include "shard-messages.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
#define GF_SHARD_DIR ".shard"
#define GF_SHARD_REMOVE_ME_DIR ".remove_me"
@@ -200,10 +200,10 @@ shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
} while (0)
typedef enum {
- SHARD_FIRST_LOOKUP_PENDING = 0,
- SHARD_FIRST_LOOKUP_IN_PROGRESS,
- SHARD_FIRST_LOOKUP_DONE,
-} shard_first_lookup_state_t;
+ SHARD_BG_DELETION_NONE = 0,
+ SHARD_BG_DELETION_LAUNCHING,
+ SHARD_BG_DELETION_IN_PROGRESS,
+} shard_bg_deletion_state_t;
/* rm = "remove me" */
@@ -217,7 +217,8 @@ typedef struct shard_priv {
int inode_count;
struct list_head ilist_head;
uint32_t deletion_rate;
- shard_first_lookup_state_t first_lookup;
+ shard_bg_deletion_state_t bg_del_state;
+ gf_boolean_t first_lookup_done;
uint64_t lru_limit;
} shard_priv_t;
@@ -253,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame,
typedef struct shard_local {
int op_ret;
int op_errno;
- int first_block;
- int last_block;
- int num_blocks;
+ uint64_t first_block;
+ uint64_t last_block;
+ uint64_t num_blocks;
int call_count;
int eexist_count;
int create_count;
@@ -274,7 +275,7 @@ typedef struct shard_local {
size_t req_size;
size_t readdir_size;
int64_t delta_size;
- int delta_blocks;
+ gf_atomic_t delta_blocks;
loc_t loc;
loc_t dot_shard_loc;
loc_t dot_shard_rm_loc;
@@ -317,6 +318,7 @@ typedef struct shard_local {
uint32_t deletion_rate;
gf_boolean_t cleanup_required;
uuid_t base_gfid;
+ char *name;
} shard_local_t;
typedef struct shard_inode_ctx {
diff --git a/xlators/features/snapview-client/src/snapview-client-mem-types.h b/xlators/features/snapview-client/src/snapview-client-mem-types.h
index aac0d571c41..3c3ab555a55 100644
--- a/xlators/features/snapview-client/src/snapview-client-mem-types.h
+++ b/xlators/features/snapview-client/src/snapview-client-mem-types.h
@@ -11,7 +11,7 @@
#ifndef _SVC_MEM_TYPES_H
#define _SVC_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum svc_mem_types {
gf_svc_mt_svc_private_t = gf_common_mt_end + 1,
diff --git a/xlators/features/snapview-client/src/snapview-client-messages.h b/xlators/features/snapview-client/src/snapview-client-messages.h
index b99578a6063..c02fb154930 100644
--- a/xlators/features/snapview-client/src/snapview-client-messages.h
+++ b/xlators/features/snapview-client/src/snapview-client-messages.h
@@ -11,7 +11,7 @@
#ifndef _SNAPVIEW_CLIENT_MESSAGES_H_
#define _SNAPVIEW_CLIENT_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -31,6 +31,41 @@ GLFS_MSGID(SNAPVIEW_CLIENT, SVC_MSG_NO_MEMORY, SVC_MSG_MEM_ACNT_FAILED,
SVC_MSG_XLATOR_CHILDREN_WRONG, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL,
SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, SVC_MSG_OPENDIR_SPECIAL_DIR,
SVC_MSG_RENAME_SNAPSHOT_ENTRY, SVC_MSG_LINK_SNAPSHOT_ENTRY,
- SVC_MSG_ENTRY_POINT_SPECIAL_DIR);
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, SVC_MSG_ENTRY_POINT_SPECIAL_DIR,
+ SVC_MSG_STR_LEN, SVC_MSG_INVALID_ENTRY_POINT, SVC_MSG_NULL_PRIV,
+ SVC_MSG_PRIV_DESTROY_FAILED, SVC_MSG_ALLOC_FD_FAILED,
+ SVC_MSG_ALLOC_INODE_FAILED, SVC_MSG_NULL_SPECIAL_DIR,
+ SVC_MSG_MEM_POOL_GET_FAILED);
+#define SVC_MSG_ALLOC_FD_FAILED_STR "failed to allocate new fd context"
+#define SVC_MSG_SET_FD_CONTEXT_FAILED_STR "failed to set fd context"
+#define SVC_MSG_STR_LEN_STR \
+ "destination buffer size is less than the length of entry point name"
+#define SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL_STR "lookup failed on normal graph"
+#define SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL_STR "lookup failed on snapview graph"
+#define SVC_MSG_SET_INODE_CONTEXT_FAILED_STR "failed to set inode context"
+#define SVC_MSG_NO_MEMORY_STR "failed to allocate memory"
+#define SVC_MSG_COPY_ENTRY_POINT_FAILED_STR \
+ "failed to copy the entry point string"
+#define SVC_MSG_GET_FD_CONTEXT_FAILED_STR "fd context not found"
+#define SVC_MSG_GET_INODE_CONTEXT_FAILED_STR "failed to get inode context"
+#define SVC_MSG_ALLOC_INODE_FAILED_STR "failed to allocate new inode"
+#define SVC_MSG_DICT_SET_FAILED_STR "failed to set dict"
+#define SVC_MSG_RENAME_SNAPSHOT_ENTRY_STR \
+ "rename happening on a entry residing in snapshot"
+#define SVC_MSG_DELETE_INODE_CONTEXT_FAILED_STR "failed to delete inode context"
+#define SVC_MSG_NULL_PRIV_STR "priv NULL"
+#define SVC_MSG_INVALID_ENTRY_POINT_STR "not a valid entry point"
+#define SVC_MSG_MEM_ACNT_FAILED_STR "Memory accouting init failed"
+#define SVC_MSG_NO_CHILD_FOR_XLATOR_STR "configured without any child"
+#define SVC_MSG_XLATOR_CHILDREN_WRONG_STR \
+ "snap-view-client has got wrong subvolumes. It can have only 2"
+#define SVC_MSG_ENTRY_POINT_SPECIAL_DIR_STR \
+ "entry point directory cannot be part of special directory"
+#define SVC_MSG_NULL_SPECIAL_DIR_STR "null special directory"
+#define SVC_MSG_MEM_POOL_GET_FAILED_STR \
+ "could not get mem pool for frame->local"
+#define SVC_MSG_PRIV_DESTROY_FAILED_STR "failed to destroy private"
+#define SVC_MSG_LINK_SNAPSHOT_ENTRY_STR \
+ "link happening on a entry residin gin snapshot"
#endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */
diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c
index c991f058dbd..486c5179d5b 100644
--- a/xlators/features/snapview-client/src/snapview-client.c
+++ b/xlators/features/snapview-client/src/snapview-client.c
@@ -9,8 +9,8 @@
*/
#include "snapview-client.h"
-#include "inode.h"
-#include "byte-order.h"
+#include <glusterfs/inode.h>
+#include <glusterfs/byte-order.h>
static void
svc_local_free(svc_local_t *local)
@@ -198,16 +198,15 @@ __svc_fd_ctx_get_or_new(xlator_t *this, fd_t *fd)
svc_fd = svc_fd_new();
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_NO_MEMORY,
- "failed to allocate new fd context for gfid %s",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_FD_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
goto out;
}
ret = __svc_fd_ctx_set(this, fd, svc_fd);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED,
- "failed to set fd context for gfid %s", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
ret = -1;
}
@@ -238,6 +237,50 @@ out:
return svc_fd;
}
+/**
+ * @this: xlator
+ * @entry_point: pointer to the buffer provided by consumer
+ *
+ * This function is mainly for copying the entry point name
+ * (stored as string in priv->path) to a buffer point to by
+ * @entry_point within the lock. It is for the consumer to
+ * allocate the memory for the buffer.
+ *
+ * This function is called by all the functions (or fops)
+ * who need to use priv->path for avoiding the race.
+ * For example, either in lookup or in any other fop,
+ * while priv->path is being accessed, a reconfigure can
+ * happen to change priv->path. This ensures that, a lock
+ * is taken before accessing priv->path.
+ **/
+int
+gf_svc_get_entry_point(xlator_t *this, char *entry_point, size_t dest_size)
+{
+ int ret = -1;
+ svc_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, entry_point, out);
+
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ if (dest_size <= strlen(priv->path)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_STR_LEN,
+ "dest-size=%zu", dest_size, "priv-path-len=%zu",
+ strlen(priv->path), "path=%s", priv->path, NULL);
+ } else {
+ snprintf(entry_point, dest_size, "%s", priv->path);
+ ret = 0;
+ }
+ }
+ UNLOCK(&priv->lock);
+
+out:
+ return ret;
+}
+
static int32_t
gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
@@ -275,19 +318,17 @@ gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
*/
if (op_ret) {
if (subvolume == FIRST_CHILD(this)) {
- gf_msg(this->name,
- (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
- : GF_LOG_ERROR,
- op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL,
- "lookup failed on normal graph with error %s",
- strerror(op_errno));
+ gf_smsg(this->name,
+ (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, "error=%s",
+ strerror(op_errno), NULL);
} else {
- gf_msg(this->name,
- (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
- : GF_LOG_ERROR,
- op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL,
- "lookup failed on snapview graph with error %s",
- strerror(op_errno));
+ gf_smsg(this->name,
+ (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, "error=%s",
+ strerror(op_errno), NULL);
goto out;
}
@@ -318,10 +359,8 @@ gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode type in the inode context "
- "(gfid: %s)",
- uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
out:
if (do_unwind) {
@@ -341,20 +380,19 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
int op_ret = -1;
int op_errno = EINVAL;
inode_t *parent = NULL;
- svc_private_t *priv = NULL;
dict_t *new_xdata = NULL;
int inode_type = -1;
int parent_type = -1;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->inode, &inode_type);
if (!__is_root_gfid(loc->gfid)) {
if (loc->parent) {
@@ -371,8 +409,7 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
if (!local) {
op_ret = -1;
op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
- "failed to allocate local");
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -411,7 +448,13 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
}
}
- if (strcmp(loc->name, priv->path)) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point)) {
if (parent_type == VIRTUAL_INODE) {
subvolume = SECOND_CHILD(this);
} else {
@@ -429,8 +472,8 @@ gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
/* Indication of whether the lookup is happening on the
entry point or not, to the snapview-server.
*/
- SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, priv,
- ret, out);
+ SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, ret,
+ out);
}
}
@@ -469,6 +512,9 @@ gf_svc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
0,
};
loc_t *temp_loc = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
@@ -484,7 +530,13 @@ gf_svc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
if (path_len >= snap_len && inode_type == VIRTUAL_INODE) {
path = &loc->path[path_len - snap_len];
- if (!strcmp(path, priv->path)) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (!strcmp(path, entry_point)) {
/*
* statfs call for virtual snap directory.
* Sent the fops to parent volume by removing
@@ -515,20 +567,24 @@ gf_svc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *buf,
dict_t *xdata)
{
- /* Consider a testcase:
+ /* TODO: FIX ME
+ * Consider a testcase:
* #mount -t nfs host1:/vol1 /mnt
* #ls /mnt
* #ls /mnt/.snaps (As expected this fails)
* #gluster volume set vol1 features.uss enable
- * Now `ls /mnt/.snaps` should work,
- * but fails with No such file or directory.
- * This is because NFS client caches the list of files in
- * a directory. This cache is updated if there are any changes
- * in the directory attributes. To solve this problem change
- * a attribute 'ctime' when USS is enabled
+ * Now `ls /mnt/.snaps` should work, but fails with No such file or
+ * directory. This is because NFS client (gNFS) caches the list of files
+ * in a directory. This cache is updated if there are any changes in the
+ * directory attributes. So, one way to solve this problem is to change
+ * 'ctime' attribute when USS is enabled as below.
+ *
+ * if (op_ret == 0 && IA_ISDIR(buf->ia_type))
+ * buf->ia_ctime_nsec++;
+ *
+ * But this is not the ideal solution as applications see the unexpected
+ * ctime change causing failures.
*/
- if (op_ret == 0 && IA_ISDIR(buf->ia_type))
- buf->ia_ctime_nsec++;
SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
return 0;
@@ -637,8 +693,8 @@ gf_svc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (special_dir) {
svc_fd = svc_fd_ctx_get_or_new(this, fd);
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "fd context not found for %s", uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -683,19 +739,17 @@ gf_svc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
local = mem_get0(this->local_pool);
if (!local) {
op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
- "failed to allocate memory for local "
- "(path: %s, gfid: %s)",
- loc->path, uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
+ "path=%s", loc->path, "gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
goto out;
}
+ loc_copy(&local->loc, loc);
+ frame->local = local;
SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
subvolume, out);
-
- loc_copy(&local->loc, loc);
local->subvolume = subvolume;
- frame->local = local;
STACK_WIND(frame, gf_svc_opendir_cbk, subvolume, subvolume->fops->opendir,
loc, fd, xdata);
@@ -728,11 +782,9 @@ gf_svc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s "
- "(gfid: %s)",
- loc->path, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid= %s", uuid_utoa(loc->inode->gfid), NULL);
goto out;
}
@@ -816,6 +868,9 @@ gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
char attrname[PATH_MAX] = "";
char attrval[64] = "";
dict_t *dict = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
@@ -839,14 +894,20 @@ gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
strcat(attrname, ":");
if (!strcmp(attrname, GF_XATTR_GET_REAL_FILENAME_KEY)) {
- if (!strcasecmp(attrval, priv->path)) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (!strcasecmp(attrval, entry_point)) {
dict = dict_new();
if (NULL == dict) {
op_errno = ENOMEM;
goto out;
}
- ret = dict_set_dynstr_with_alloc(dict, (char *)name, priv->path);
+ ret = dict_set_dynstr_with_alloc(dict, (char *)name, entry_point);
if (ret) {
op_errno = ENOMEM;
@@ -854,7 +915,7 @@ gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
}
op_errno = 0;
- op_ret = strlen(priv->path) + 1;
+ op_ret = strlen(entry_point) + 1;
/* We should return from here */
goto out;
}
@@ -931,11 +992,9 @@ gf_svc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get inode context for %s "
- "(gfid: %s)",
- loc->name, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto out;
}
@@ -977,10 +1036,9 @@ gf_svc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get inode context for %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -1022,11 +1080,9 @@ gf_svc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s "
- "(gfid: %s)",
- loc->name, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto out;
}
@@ -1062,8 +1118,8 @@ gf_svc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = NORMAL_INODE;
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
out:
SVC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
@@ -1079,29 +1135,33 @@ gf_svc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
int ret = -1;
int op_ret = -1;
int op_errno = EINVAL;
- svc_private_t *priv = NULL;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
goto out;
}
- if (strcmp(loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
STACK_WIND(frame, gf_svc_mkdir_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
} else {
@@ -1134,8 +1194,8 @@ gf_svc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = NORMAL_INODE;
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
out:
SVC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
@@ -1151,29 +1211,33 @@ gf_svc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
int ret = -1;
int op_ret = -1;
int op_errno = EINVAL;
- svc_private_t *priv = NULL;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
goto out;
}
- if (strcmp(loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
STACK_WIND(frame, gf_svc_mknod_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
xdata);
@@ -1254,8 +1318,8 @@ gf_svc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = NORMAL_INODE;
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
out:
SVC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
@@ -1272,30 +1336,34 @@ gf_svc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int ret = -1;
int op_ret = -1;
int op_errno = EINVAL;
- svc_private_t *priv = NULL;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
GF_VALIDATE_OR_GOTO(this->name, fd, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
goto out;
}
- if (strcmp(loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
STACK_WIND(frame, gf_svc_create_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
xdata);
@@ -1329,8 +1397,8 @@ gf_svc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = NORMAL_INODE;
ret = svc_inode_ctx_set(this, inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
out:
SVC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
@@ -1347,29 +1415,33 @@ gf_svc_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
int op_ret = -1;
int op_errno = EINVAL;
int ret = -1;
- svc_private_t *priv = NULL;
gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("svc", this, out);
GF_VALIDATE_OR_GOTO(this->name, frame, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
GF_VALIDATE_OR_GOTO(this->name, loc, out);
GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
- priv = this->private;
-
ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
goto out;
}
- if (strcmp(loc->name, priv->path) && parent_type == NORMAL_INODE) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
STACK_WIND(frame, gf_svc_symlink_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
xdata);
@@ -1407,11 +1479,9 @@ gf_svc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the inode context "
- "for %s",
- uuid_utoa(loc->parent->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
goto out;
}
@@ -1533,14 +1603,13 @@ gf_svc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
gf_dirent_t *entry = NULL;
gf_dirent_t *tmpentry = NULL;
svc_local_t *local = NULL;
- svc_private_t *priv = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
if (op_ret < 0)
goto out;
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- priv = this->private;
local = frame->local;
/* If .snaps pre-exists, then it should not be listed
@@ -1551,9 +1620,23 @@ gf_svc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
if (local->subvolume != FIRST_CHILD(this))
goto out;
+ /*
+ * Better to goto out if getting the entry point
+ * fails. We might end up sending the directory
+ * entry for the snapview entry point in the readdir
+ * response. But, the intention is to avoid the race
+ * condition where priv->path is being changed in
+ * reconfigure while this is accessing it.
+ */
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
list_for_each_entry_safe(entry, tmpentry, &entries->list, list)
{
- if (strcmp(priv->path, entry->d_name) == 0)
+ if (strcmp(entry_point, entry->d_name) == 0)
gf_dirent_entry_free(entry);
}
@@ -1585,9 +1668,8 @@ gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
svc_fd = svc_fd_ctx_get_or_new(this, fd);
if (!svc_fd)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for inode %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
else {
if (svc_fd->entry_point_handled && off == svc_fd->last_offset) {
op_ret = 0;
@@ -1601,9 +1683,8 @@ gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local = mem_get0(this->local_pool);
if (!local) {
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
- "failed to allocate local (inode: %s)",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
+ "inode-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
local->subvolume = subvolume;
@@ -1655,17 +1736,16 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
gf_dirent_t entries;
gf_dirent_t *entry = NULL;
- svc_private_t *private = NULL;
svc_fd_t *svc_fd = NULL;
svc_local_t *local = NULL;
int inode_type = -1;
int ret = -1;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("snapview-client", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- private
- = this->private;
INIT_LIST_HEAD(&entries.list);
local = frame->local;
@@ -1685,18 +1765,25 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
svc_fd = svc_fd_ctx_get(this, local->fd);
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for the inode %s",
- uuid_utoa(local->fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL);
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
op_ret = 0;
op_errno = ENOENT;
goto out;
}
- entry = gf_dirent_for_name(private->path);
+ entry = gf_dirent_for_name(entry_point);
if (!entry) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
- "failed to allocate memory for the entry %s", private->path);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "entry-point=%s", entry_point, NULL);
op_ret = 0;
op_errno = ENOMEM;
goto out;
@@ -1710,9 +1797,8 @@ gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
inode_type = VIRTUAL_INODE;
ret = svc_inode_ctx_set(this, entry->inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set the inode context for the entry %s",
- entry->d_name);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ "entry-name=%s", entry->d_name, NULL);
list_add_tail(&entry->list, &entries.list);
op_ret = 1;
@@ -1732,18 +1818,16 @@ int
gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this,
dict_t *xdata)
{
- svc_private_t *private = NULL;
svc_local_t *local = NULL;
loc_t *loc = NULL;
dict_t *tmp_xdata = NULL;
char *path = NULL;
int ret = -1;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("snapview-client", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
-
- private
- = this->private;
local = frame->local;
loc = &local->loc;
@@ -1759,13 +1843,19 @@ gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this,
inode_unref(loc->inode);
loc->inode = inode_new(loc->parent->table);
if (!loc->inode) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_NO_MEMORY,
- "failed to allocate new inode");
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_INODE_FAILED,
+ NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
goto out;
}
gf_uuid_copy(local->loc.gfid, loc->inode->gfid);
- ret = inode_path(loc->parent, private->path, &path);
+ ret = inode_path(loc->parent, entry_point, &path);
if (ret < 0)
goto out;
@@ -1790,8 +1880,7 @@ gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this,
ret = dict_set_str(tmp_xdata, "entry-point", "true");
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED,
- "failed to set dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL);
goto out;
}
@@ -1820,6 +1909,9 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
gf_boolean_t unwind = _gf_true;
svc_fd_t *svc_fd = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
GF_VALIDATE_OR_GOTO("snapview-client", this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@@ -1832,9 +1924,8 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
fd = local->fd;
svc_fd = svc_fd_ctx_get(this, fd);
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for inode %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -1850,12 +1941,18 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret == 0 && op_errno == ENOENT && private->special_dir &&
strcmp(private->special_dir, "") && svc_fd->special_dir &&
local->subvolume == FIRST_CHILD(this)) {
- inode = inode_grep(fd->inode->table, fd->inode, private->path);
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ SVC_MSG_GET_FD_CONTEXT_FAILED, NULL);
+ goto out;
+ }
+
+ inode = inode_grep(fd->inode->table, fd->inode, entry_point);
if (!inode) {
inode = inode_new(fd->inode->table);
if (!inode) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
- "failed to allocate new inode");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ALLOC_INODE_FAILED,
+ NULL);
goto out;
}
}
@@ -1863,7 +1960,7 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
gf_uuid_copy(local->loc.pargfid, fd->inode->gfid);
gf_uuid_copy(local->loc.gfid, inode->gfid);
if (gf_uuid_is_null(inode->gfid))
- ret = inode_path(fd->inode, private->path, &path);
+ ret = inode_path(fd->inode, entry_point, &path);
else
ret = inode_path(inode, NULL, &path);
@@ -1885,8 +1982,7 @@ gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
ret = dict_set_str(tmp_xdata, "entry-point", "true");
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_SET_FAILED,
- "failed to set dict");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL);
goto out;
}
@@ -1923,21 +2019,21 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
svc_fd_t *svc_fd = NULL;
gf_boolean_t unwind = _gf_true;
- svc_private_t *priv = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
if (op_ret < 0)
goto out;
GF_VALIDATE_OR_GOTO("snapview-client", this, out);
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- priv = this->private;
+
local = frame->local;
svc_fd = svc_fd_ctx_get(this, local->fd);
if (!svc_fd) {
- gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for gfid %s",
- uuid_utoa(local->fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL);
}
if (local->subvolume == FIRST_CHILD(this))
@@ -1945,6 +2041,19 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
else
inode_type = VIRTUAL_INODE;
+ /*
+ * Better to goto out and return whatever is there in the
+ * readdirp response (even if the readdir response contains
+ * a directory entry for the snapshot entry point). Otherwise
+ * if we ignore the error, then there is a chance of race
+ * condition where, priv->path is changed in reconfigure
+ */
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
+ goto out;
+ }
+
list_for_each_entry_safe(entry, tmpentry, &entries->list, list)
{
/* If .snaps pre-exists, then it should not be listed
@@ -1952,7 +2061,7 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
* so filter the .snaps entry if exists.
* However it is OK to list .snaps in VIRTUAL world
*/
- if (inode_type == NORMAL_INODE && !strcmp(priv->path, entry->d_name)) {
+ if (inode_type == NORMAL_INODE && !strcmp(entry_point, entry->d_name)) {
gf_dirent_entry_free(entry);
continue;
}
@@ -1962,9 +2071,8 @@ gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
ret = svc_inode_ctx_set(this, entry->inode, inode_type);
if (ret)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- SVC_MSG_SET_INODE_CONTEXT_FAILED,
- "failed to set inode context");
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_SET_INODE_CONTEXT_FAILED, NULL);
if (svc_fd)
svc_fd->last_offset = entry->d_off;
}
@@ -2003,8 +2111,7 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
local = mem_get0(this->local_pool);
if (!local) {
op_errno = ENOMEM;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
- "failed to allocate local");
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -2019,9 +2126,8 @@ gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
*/
svc_fd = svc_fd_ctx_get_or_new(this, fd);
if (!svc_fd)
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
- "failed to get the fd context for the inode %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
else {
if (svc_fd->entry_point_handled && off == svc_fd->last_offset) {
op_ret = 0;
@@ -2076,22 +2182,17 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get the context for the inode "
- "%s",
- uuid_utoa(oldloc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(oldloc->inode->gfid), NULL);
goto out;
}
if (src_inode_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_RENAME_SNAPSHOT_ENTRY,
- "rename happening on a entry %s "
- "residing in snapshot",
- oldloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "name=%s", oldloc->name, NULL);
goto out;
}
@@ -2100,11 +2201,9 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!ret && dst_inode_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_RENAME_SNAPSHOT_ENTRY,
- "rename of %s happening to a entry "
- "%s residing in snapshot",
- oldloc->name, newloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s",
+ oldloc->name, "newloc-name=%s", newloc->name, NULL);
goto out;
}
}
@@ -2114,11 +2213,9 @@ gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!ret && dst_parent_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_RENAME_SNAPSHOT_ENTRY,
- "rename of %s happening to a entry %s "
- "residing in snapshot",
- oldloc->name, newloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s",
+ oldloc->name, "newloc-name=%s", newloc->name, NULL);
goto out;
}
}
@@ -2160,9 +2257,8 @@ gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!ret && src_inode_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
- "link happening on a entry %s residing in snapshot",
- oldloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
+ "oldloc-name=%s", oldloc->name, NULL);
goto out;
}
@@ -2170,10 +2266,9 @@ gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
if (!ret && dst_parent_type == VIRTUAL_INODE) {
op_ret = -1;
op_errno = EROFS;
- gf_msg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
- "link of %s happening to a entry %s "
- "residing in snapshot",
- oldloc->name, newloc->name);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
+ "oldloc-name=%s", oldloc->name, "newloc-name=%s", newloc->name,
+ NULL);
goto out;
}
@@ -2208,11 +2303,9 @@ gf_svc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get inode context for %s "
- "(gfid: %s)",
- loc->path, uuid_utoa(loc->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
goto out;
}
@@ -2253,10 +2346,9 @@ gf_svc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
if (ret < 0) {
op_ret = -1;
op_errno = EINVAL;
- gf_msg(this->name, GF_LOG_ERROR, op_errno,
- SVC_MSG_GET_INODE_CONTEXT_FAILED,
- "failed to get inode context for %s",
- uuid_utoa(fd->inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
goto out;
}
@@ -2340,8 +2432,9 @@ gf_svc_forget(xlator_t *this, inode_t *inode)
ret = inode_ctx_del(inode, this, &value);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DELETE_INODE_CONTEXT_FAILED,
- "failed to delete inode context for %s", uuid_utoa(inode->gfid));
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_DELETE_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(inode->gfid), NULL);
goto out;
}
@@ -2349,16 +2442,112 @@ out:
return 0;
}
+static int
+gf_svc_priv_destroy(xlator_t *this, svc_private_t *priv)
+{
+ int ret = -1;
+
+ if (!priv) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_NULL_PRIV, NULL);
+ goto out;
+ }
+
+ GF_FREE(priv->path);
+ GF_FREE(priv->special_dir);
+
+ LOCK_DESTROY(&priv->lock);
+
+ GF_FREE(priv);
+
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/**
+ * ** NOTE **:
+ * =============
+ * The option "snapdir-entry-path" is NOT reconfigurable.
+ * That option as of now is only for the consumption of
+ * samba, where, it needs to tell glusterfs about the
+ * directory that is shared with windows client for the
+ * access. Now, in windows-explorer (GUI) interface, for
+ * the directory shared, the entry point to the snapshot
+ * world (snapshot-directory option) should be visible,
+ * atleast as a hidden entry. For that to happen, glusterfs
+ * has to send that entry in the readdir response coming on
+ * the directory used as the smb share. Therefore, samba,
+ * while initializing the gluster volume (via gfapi) sets
+ * the xlator option "snapdir-entry-path" to the directory
+ * which is to be shared with windows (check the file
+ * vfs_glusterfs.c from samba source code). So to avoid
+ * problems with smb access, not allowing snapdir-entry-path
+ * option to be configurable. That option is for those
+ * consumers who know what they are doing.
+ **/
int
reconfigure(xlator_t *this, dict_t *options)
{
svc_private_t *priv = NULL;
+ char *path = NULL;
+ gf_boolean_t show_entry_point = _gf_false;
+ char *tmp = NULL;
priv = this->private;
- GF_OPTION_RECONF("snapshot-directory", priv->path, options, str, out);
- GF_OPTION_RECONF("show-snapshot-directory", priv->show_entry_point, options,
- bool, out);
+ GF_OPTION_RECONF("snapshot-directory", path, options, str, out);
+ if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT,
+ "path=%s", path, NULL);
+ goto out;
+ }
+
+ GF_OPTION_RECONF("show-snapshot-directory", show_entry_point, options, bool,
+ out);
+
+ /*
+ * The assumption now is that priv->path is an allocated memory (either
+ * in init or in a previous reconfigure).
+ * So, the intention here is to preserve the older contents of the option
+ * until the new option's value has been completely stored in the priv.
+ * So, do this.
+ * - Store the pointer of priv->path in a temporary pointer.
+ * - Allocate new memory for the new value of the option that is just
+ * obtained from the above call to GF_OPTION_RECONF.
+ * - If the above allocation fails, again set the pointer from priv
+ * to the address stored in tmp. i.e. the previous value.
+ * - If the allocation succeeds, then free the tmp pointer.
+ * WARNING: Before changing the allocation and freeing logic of
+ * priv->path, always check the init function to see how
+ * priv->path is set. Take decisions accordingly. As of now,
+ * the assumption is that, the string elements of private
+ * structure of snapview-client are allocated (either in
+ * init or here in reconfugure).
+ */
+ LOCK(&priv->lock);
+ {
+ tmp = priv->path;
+ priv->path = NULL;
+ priv->path = gf_strdup(path);
+ if (!priv->path) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to reconfigure snapshot-directory option to %s",
+ path);
+ priv->path = tmp;
+ } else {
+ GF_FREE(tmp);
+ tmp = NULL;
+ }
+
+ priv->show_entry_point = show_entry_point;
+ }
+ UNLOCK(&priv->lock);
out:
return 0;
@@ -2375,9 +2564,7 @@ mem_acct_init(xlator_t *this)
ret = xlator_mem_acct_init(this, gf_svc_mt_end + 1);
if (ret != 0) {
- gf_msg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED,
- "Memory accounting"
- " init failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED, NULL);
}
return ret;
@@ -2390,10 +2577,11 @@ init(xlator_t *this)
int ret = -1;
int children = 0;
xlator_list_t *xl = NULL;
+ char *path = NULL;
+ char *special_dir = NULL;
if (!this->children) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR,
- "configured without any child");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR, NULL);
goto out;
}
@@ -2404,11 +2592,8 @@ init(xlator_t *this)
}
if (children != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG,
- "snap-view-client "
- "has got %d subvolumes. It can have only 2 "
- "subvolumes.",
- children);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG,
+ "subvol-num=%d", children, NULL);
goto out;
}
@@ -2424,34 +2609,59 @@ init(xlator_t *this)
if (!private)
goto out;
- GF_OPTION_INIT("snapshot-directory", private->path, str, out);
- GF_OPTION_INIT("snapdir-entry-path", private->special_dir, str, out);
- GF_OPTION_INIT("show-snapshot-directory", private->show_entry_point, bool,
- out);
+ LOCK_INIT(&private->lock);
- if (strstr(private->special_dir, private->path)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ENTRY_POINT_SPECIAL_DIR,
- "entry point "
- "directory cannot be part of the special directory");
- GF_FREE(private->special_dir);
- private
- ->special_dir = NULL;
+ GF_OPTION_INIT("snapshot-directory", path, str, out);
+ if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT,
+ "path=%s", path, NULL);
goto out;
}
- this->private = private;
+ private
+ ->path = gf_strdup(path);
+ if (!private->path) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "entry-point-path=%s", path, NULL);
+ goto out;
+ }
+
+ GF_OPTION_INIT("snapdir-entry-path", special_dir, str, out);
+ if (!special_dir || strstr(special_dir, path)) {
+ if (special_dir)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_ENTRY_POINT_SPECIAL_DIR, "path=%s", path,
+ "special-dir=%s", special_dir);
+ else
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NULL_SPECIAL_DIR,
+ NULL);
+ goto out;
+ }
+
+ private
+ ->special_dir = gf_strdup(special_dir);
+ if (!private->special_dir) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "special-directory=%s", special_dir, NULL);
+ goto out;
+ }
+
+ GF_OPTION_INIT("show-snapshot-directory", private->show_entry_point, bool,
+ out);
+
this->local_pool = mem_pool_new(svc_local_t, 128);
if (!this->local_pool) {
- gf_msg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
- "could not get mem pool for frame->local");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_MEM_POOL_GET_FAILED, NULL);
goto out;
}
+ this->private = private;
+
ret = 0;
out:
if (ret)
- GF_FREE(private);
+ (void)gf_svc_priv_destroy(this, private);
return ret;
}
@@ -2468,9 +2678,15 @@ fini(xlator_t *this)
if (!priv)
return;
- this->private = NULL;
+ /*
+ * Just log the failure and go ahead to
+ * set this->priv to NULL.
+ */
+ if (gf_svc_priv_destroy(this, priv))
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_PRIV_DESTROY_FAILED,
+ NULL);
- GF_FREE(priv);
+ this->private = NULL;
return;
}
@@ -2559,3 +2775,17 @@ struct volume_options options[] = {
},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "snapview-client",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/snapview-client/src/snapview-client.h b/xlators/features/snapview-client/src/snapview-client.h
index d19a796f101..166116a439d 100644
--- a/xlators/features/snapview-client/src/snapview-client.h
+++ b/xlators/features/snapview-client/src/snapview-client.h
@@ -10,11 +10,11 @@
#ifndef __SNAP_VIEW_CLIENT_H__
#define __SNAP_VIEW_CLIENT_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "snapview-client-mem-types.h"
#include "snapview-client-messages.h"
@@ -39,8 +39,8 @@ typedef struct __svc_local svc_local_t;
svc_local_free(__local); \
} while (0)
-#define SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, priv, \
- ret, label) \
+#define SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, ret, \
+ label) \
do { \
if (!xdata) { \
xdata = new_xdata = dict_new(); \
@@ -81,6 +81,7 @@ struct svc_private {
char *path;
char *special_dir; /* needed for samba */
gf_boolean_t show_entry_point;
+ gf_lock_t lock; /* mainly to guard private->path */
};
typedef struct svc_private svc_private_t;
diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c
index 6530901c5b8..62c1ddac49c 100644
--- a/xlators/features/snapview-server/src/snapview-server-helpers.c
+++ b/xlators/features/snapview-server/src/snapview-server-helpers.c
@@ -10,7 +10,7 @@
#include "snapview-server.h"
#include "snapview-server-mem-types.h"
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "rpc-clnt.h"
#include "xdr-generic.h"
#include "protocol-common.h"
@@ -476,6 +476,7 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name,
char logfile[PATH_MAX] = {
0,
};
+ char *volfile_server = NULL;
GF_VALIDATE_OR_GOTO("snapview-server", this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@@ -512,14 +513,50 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name,
goto out;
}
- ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ /*
+ * Before, localhost was used as the volfile server. But, with that
+ * method, accessing snapshots started giving ENOENT error if a
+ * specific bind address is mentioned in the glusterd volume file.
+ * Check the bug https://bugzilla.redhat.com/show_bug.cgi?id=1725211.
+ * So, the new method is tried below, where, snapview-server first
+ * uses the volfile server used by the snapd (obtained from the
+ * command line arguments saved in the global context of the process).
+ * If the volfile server in global context is NULL, then localhost
+ * is tried (like before).
+ */
+ if (this->ctx->cmd_args.volfile_server) {
+ volfile_server = gf_strdup(this->ctx->cmd_args.volfile_server);
+ if (!volfile_server) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "failed to copy volfile server %s. ",
+ this->ctx->cmd_args.volfile_server);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "volfile server is NULL in cmd args. "
+ "Trying with localhost");
+ volfile_server = gf_strdup("localhost");
+ if (!volfile_server) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "failed to copy volfile server localhost.");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, local_errno,
SVS_MSG_SET_VOLFILE_SERVR_FAILED,
"setting the "
- "volfile server for snap volume %s "
+ "volfile server %s for snap volume %s "
"failed",
- dirent->name);
+ volfile_server, dirent->name);
goto out;
}
@@ -561,6 +598,7 @@ out:
dirent->fs = fs;
}
+ GF_FREE(volfile_server);
return fs;
}
diff --git a/xlators/features/snapview-server/src/snapview-server-mem-types.h b/xlators/features/snapview-server/src/snapview-server-mem-types.h
index 504c7969bdc..63456b85323 100644
--- a/xlators/features/snapview-server/src/snapview-server-mem-types.h
+++ b/xlators/features/snapview-server/src/snapview-server-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __SNAP_VIEW_MEM_TYPES_H
#define __SNAP_VIEW_MEM_TYPES_H
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum snapview_mem_types {
gf_svs_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/features/snapview-server/src/snapview-server-messages.h b/xlators/features/snapview-server/src/snapview-server-messages.h
index 44cc1575405..f634ab5d2b0 100644
--- a/xlators/features/snapview-server/src/snapview-server-messages.h
+++ b/xlators/features/snapview-server/src/snapview-server-messages.h
@@ -11,7 +11,7 @@
#ifndef _SNAPVIEW_SERVER_MESSAGES_H_
#define _SNAPVIEW_SERVER_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -49,6 +49,6 @@ GLFS_MSGID(SNAPVIEW_SERVER, SVS_MSG_NO_MEMORY, SVS_MSG_MEM_ACNT_FAILED,
SVS_MSG_CLOSEDIR_FAILED, SVS_MSG_CLOSE_FAILED,
SVS_MSG_GFID_GEN_FAILED, SVS_MSG_GLFS_NEW_FAILED,
SVS_MSG_SET_VOLFILE_SERVR_FAILED, SVS_MSG_SET_LOGGING_FAILED,
- SVS_MSG_GLFS_INIT_FAILED);
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL, SVS_MSG_GLFS_INIT_FAILED);
#endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */
diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c
index 8e23b9adb2e..ecf31c3b880 100644
--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c
+++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c
@@ -26,11 +26,11 @@ mgmt_cbk_snap(struct rpc_clnt *rpc, void *mydata, void *data)
return 0;
}
-rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = {
- [GF_CBK_GET_SNAPS] = {"GETSNAPS", GF_CBK_GET_SNAPS, mgmt_cbk_snap},
+static rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_GET_SNAPS] = {"GETSNAPS", mgmt_cbk_snap, GF_CBK_GET_SNAPS},
};
-struct rpcclnt_cb_program svs_cbk_prog = {
+static struct rpcclnt_cb_program svs_cbk_prog = {
.progname = "GlusterFS Callback",
.prognum = GLUSTER_CBK_PROGRAM,
.progver = GLUSTER_CBK_VERSION,
@@ -38,12 +38,12 @@ struct rpcclnt_cb_program svs_cbk_prog = {
.numactors = GF_CBK_MAXVALUE,
};
-char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
[GF_HNDSK_NULL] = "NULL",
[GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
};
-rpc_clnt_prog_t svs_clnt_handshake_prog = {
+static rpc_clnt_prog_t svs_clnt_handshake_prog = {
.progname = "GlusterFS Handshake",
.prognum = GLUSTER_HNDSK_PROGRAM,
.progver = GLUSTER_HNDSK_VERSION,
@@ -86,6 +86,7 @@ svs_mgmt_init(xlator_t *this)
char *host = NULL;
cmd_args_t *cmd_args = NULL;
glusterfs_ctx_t *ctx = NULL;
+ xlator_cmdline_option_t *opt = NULL;
GF_VALIDATE_OR_GOTO("snapview-server", this, out);
GF_VALIDATE_OR_GOTO(this->name, this->private, out);
@@ -100,7 +101,13 @@ svs_mgmt_init(xlator_t *this)
if (cmd_args->volfile_server)
host = cmd_args->volfile_server;
- ret = rpc_transport_inet_options_build(&options, host, port);
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt != NULL ? opt->value : NULL));
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_BUILD_TRNSPRT_OPT_FAILED,
"failed to build the "
@@ -142,6 +149,8 @@ svs_mgmt_init(xlator_t *this)
gf_msg_debug(this->name, 0, "svs mgmt init successful");
out:
+ if (options)
+ dict_unref(options);
if (ret)
if (priv) {
rpc_clnt_connection_cleanup(&priv->rpc->conn);
@@ -228,7 +237,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
glusterfs_ctx_t *ctx = NULL;
int ret = -1;
dict_t *dict = NULL;
- char key[1024] = {0};
+ char key[32] = {0};
+ int len;
int snapcount = 0;
svs_private_t *priv = NULL;
xlator_t *this = NULL;
@@ -247,7 +257,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
this = frame->this;
ctx = frame->this->ctx;
priv = this->private;
- old_dirents = priv->dirents;
if (!ctx) {
errno = EINVAL;
@@ -322,8 +331,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
for (i = 0; i < snapcount; i++) {
- snprintf(key, sizeof(key), "snap-volname.%d", i + 1);
- ret = dict_get_str(dict, key, &value);
+ len = snprintf(key, sizeof(key), "snap-volname.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
if (ret) {
errno = EINVAL;
ret = -1;
@@ -335,8 +344,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
strncpy(dirents[i].snap_volname, value,
sizeof(dirents[i].snap_volname));
- snprintf(key, sizeof(key), "snap-id.%d", i + 1);
- ret = dict_get_str(dict, key, &value);
+ len = snprintf(key, sizeof(key), "snap-id.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
if (ret) {
errno = EINVAL;
ret = -1;
@@ -346,8 +355,8 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
strncpy(dirents[i].uuid, value, sizeof(dirents[i].uuid));
- snprintf(key, sizeof(key), "snapname.%d", i + 1);
- ret = dict_get_str(dict, key, &value);
+ len = snprintf(key, sizeof(key), "snapname.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
if (ret) {
errno = EINVAL;
ret = -1;
@@ -379,6 +388,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
LOCK(&priv->snaplist_lock);
{
oldcount = priv->num_snaps;
+ old_dirents = priv->dirents;
for (i = 0; i < priv->num_snaps; i++) {
for (j = 0; j < snapcount; j++) {
if ((!strcmp(old_dirents[i].name, dirents[j].name)) &&
@@ -398,7 +408,12 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
if (old_dirents) {
for (i = 0; i < oldcount; i++) {
if (old_dirents[i].fs)
- glfs_fini(old_dirents[i].fs);
+ gf_msg_debug(this->name, 0,
+ "calling glfs_fini on "
+ "name: %s, snap_volname: %s, uuid: %s",
+ old_dirents[i].name, old_dirents[i].snap_volname,
+ old_dirents[i].uuid);
+ glfs_fini(old_dirents[i].fs);
}
}
diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c
index 7a8e29ecdfc..76cccae5914 100644
--- a/xlators/features/snapview-server/src/snapview-server.c
+++ b/xlators/features/snapview-server/src/snapview-server.c
@@ -9,15 +9,17 @@
*/
#include "snapview-server.h"
#include "snapview-server-mem-types.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "rpc-clnt.h"
#include "xdr-generic.h"
#include "protocol-common.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include <pthread.h>
+#include "glfs-internal.h"
+
int
gf_setcredentials(uid_t *uid, gid_t *gid, uint16_t ngrps, uint32_t *groups)
{
@@ -826,7 +828,8 @@ out:
* back into the dict. But to get the values for those xattrs it has to do the
* getxattr operation on each xattr which might turn out to be a costly
* operation. So for each of the xattrs present in the list, a 0 byte value
- * ("") is set into the dict before unwinding. This can be treated as an
+ * ("") is set into the dict before unwinding. Since ("") is also a valid xattr
+ * value(in a file system) we use an extra key in the same dictionary as an
* indicator to other xlators which want to cache the xattrs (as of now,
* md-cache which caches acl and selinux related xattrs) to not to cache the
* values of the xattrs present in the dict.
@@ -869,6 +872,15 @@ svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size)
list_offset += strlen(keybuffer) + 1;
} /* while (remaining_size > 0) */
+ /* Add an additional key to indicate that we don't need to cache these
+ * xattrs(with value "") */
+ ret = dict_set_str(dict, "glusterfs.skip-cache", "");
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED,
+ "dict set operation for the key glusterfs.skip-cache failed.");
+ goto out;
+ }
+
ret = 0;
out:
@@ -995,8 +1007,8 @@ svs_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
op_errno = ENOMEM;
gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
"failed to add xattrs from the list to "
- "dict for %s (gfid: %s, key: %s)",
- loc->path, uuid_utoa(loc->inode->gfid), name);
+ "dict for %s (gfid: %s)",
+ loc->path, uuid_utoa(loc->inode->gfid));
goto out;
}
GF_FREE(value);
@@ -1177,8 +1189,8 @@ svs_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
op_errno = ENOMEM;
gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
"failed to add xattrs from the list "
- "to dict (gfid: %s, key: %s)",
- uuid_utoa(fd->inode->gfid), name);
+ "to dict (gfid: %s)",
+ uuid_utoa(fd->inode->gfid));
goto out;
}
GF_FREE(value);
@@ -2000,7 +2012,9 @@ svs_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
"failed",
loc->name, uuid_utoa(loc->inode->gfid));
goto out;
- }
+ } else
+ gf_msg_debug(this->name, 0, "stat on %s (%s) successful", loc->path,
+ uuid_utoa(loc->inode->gfid));
iatt_from_stat(&buf, &stat);
gf_uuid_copy(buf.ia_gfid, loc->inode->gfid);
@@ -2256,7 +2270,7 @@ svs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
};
svs_fd_t *sfd = NULL;
int ret = -1;
- struct stat fstatbuf = {
+ struct glfs_stat fstatbuf = {
0,
};
glfs_fd_t *glfd = NULL;
@@ -2333,7 +2347,7 @@ svs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
iobref = iobref_new();
iobref_add(iobref, iobuf);
- iatt_from_stat(&stbuf, &fstatbuf);
+ glfs_iatt_from_statx(&stbuf, &fstatbuf);
gf_uuid_copy(stbuf.ia_gfid, fd->inode->gfid);
svs_fill_ino_from_gfid(&stbuf);
@@ -2691,3 +2705,16 @@ struct volume_options options[] = {
},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "snapview-server",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/snapview-server/src/snapview-server.h b/xlators/features/snapview-server/src/snapview-server.h
index 6951d31449f..6472422e715 100644
--- a/xlators/features/snapview-server/src/snapview-server.h
+++ b/xlators/features/snapview-server/src/snapview-server.h
@@ -10,26 +10,24 @@
#ifndef __SNAP_VIEW_H__
#define __SNAP_VIEW_H__
-#include "dict.h"
-#include "defaults.h"
-#include "mem-types.h"
-#include "call-stub.h"
-#include "byte-order.h"
-#include "iatt.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/mem-types.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/iatt.h>
#include <ctype.h>
#include <sys/uio.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
#include "glfs.h"
-#include "common-utils.h"
#include "glfs-handles.h"
#include "glfs-internal.h"
#include "glusterfs3-xdr.h"
-#include "glusterfs-acl.h"
-#include "syncop.h"
-#include "list.h"
-#include "timer.h"
+#include <glusterfs/glusterfs-acl.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
#include "rpc-clnt.h"
#include "protocol-common.h"
#include "xdr-generic.h"
@@ -58,9 +56,16 @@
{ \
for (i = 0; i < _private->num_snaps; i++) { \
tmp_fs = _private->dirents[i].fs; \
- gf_log(this->name, GF_LOG_DEBUG, "dirent->fs: %p", tmp_fs); \
+ gf_log(this->name, GF_LOG_DEBUG, \
+ "snap name: %s, snap volume: %s," \
+ "dirent->fs: %p", \
+ _private->dirents[i].name, \
+ _private->dirents[i].snap_volname, tmp_fs); \
if (tmp_fs && fs && (tmp_fs == fs)) { \
found = _gf_true; \
+ gf_msg_debug(this->name, 0, \
+ "found the fs " \
+ "instance"); \
break; \
} \
} \
diff --git a/xlators/features/thin-arbiter/src/Makefile.am b/xlators/features/thin-arbiter/src/Makefile.am
index 7fd31a66caa..a3c133e7798 100644
--- a/xlators/features/thin-arbiter/src/Makefile.am
+++ b/xlators/features/thin-arbiter/src/Makefile.am
@@ -1,6 +1,4 @@
-if WITH_SERVER
xlator_LTLIBRARIES = thin-arbiter.la
-endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h b/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h
index 79b5ce0eee3..69562d2febc 100644
--- a/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h
+++ b/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h
@@ -9,7 +9,7 @@
#ifndef __THIN_ARBITER_MEM_TYPES_H__
#define __THIN_ARBITER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
typedef enum gf_ta_mem_types_ {
gf_ta_mt_local_t = gf_common_mt_end + 1,
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter-messages.h b/xlators/features/thin-arbiter/src/thin-arbiter-messages.h
index f49b3eedadf..81d7491577a 100644
--- a/xlators/features/thin-arbiter/src/thin-arbiter-messages.h
+++ b/xlators/features/thin-arbiter/src/thin-arbiter-messages.h
@@ -11,7 +11,7 @@
#ifndef _TA_MESSAGES_H_
#define _TA_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter.c b/xlators/features/thin-arbiter/src/thin-arbiter.c
index 062e04132d6..ce3008636f1 100644
--- a/xlators/features/thin-arbiter/src/thin-arbiter.c
+++ b/xlators/features/thin-arbiter/src/thin-arbiter.c
@@ -11,11 +11,11 @@
#include "thin-arbiter.h"
#include "thin-arbiter-messages.h"
#include "thin-arbiter-mem-types.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
int
ta_set_incoming_values(dict_t *dict, char *key, data_t *value, void *data)
@@ -646,3 +646,16 @@ struct xlator_cbks cbks = {};
struct volume_options options[] = {
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_6_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "thin-arbiter",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter.h b/xlators/features/thin-arbiter/src/thin-arbiter.h
index af3d4b1af92..e5f914b84bf 100644
--- a/xlators/features/thin-arbiter/src/thin-arbiter.h
+++ b/xlators/features/thin-arbiter/src/thin-arbiter.h
@@ -11,12 +11,12 @@
#ifndef _THIN_ARBITER_H
#define _THIN_ARBITER_H
-#include "locking.h"
-#include "common-utils.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "list.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/list.h>
#define THIN_ARBITER_SOURCE_XATTR "trusted.ta.source"
#define THIN_ARBITER_SOURCE_SIZE 2
diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h
index 133f2edf99b..43353c8f095 100644
--- a/xlators/features/trash/src/trash-mem-types.h
+++ b/xlators/features/trash/src/trash-mem-types.h
@@ -10,7 +10,7 @@
#ifndef __TRASH_MEM_TYPES_H__
#define __TRASH_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_trash_mem_types_ {
gf_trash_mt_trash_private_t = gf_common_mt_end + 1,
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index aa546d3394d..7d09cba3e9c 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -9,7 +9,7 @@
*/
#include "trash.h"
#include "trash-mem-types.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#define root_gfid \
(uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }
@@ -170,7 +170,7 @@ store_eliminate_path(char *str, trash_elim_path **eliminate)
int ret = 0;
char *strtokptr = NULL;
- if (eliminate == NULL) {
+ if ((str == NULL) || (eliminate == NULL)) {
ret = EINVAL;
goto out;
}
@@ -212,11 +212,11 @@ void
append_time_stamp(char *name, size_t name_size)
{
int i;
- char timestr[64] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
- gf_time_fmt(timestr, sizeof(timestr), time(NULL), gf_timefmt_F_HMS);
+ gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS);
/* removing white spaces in timestamp */
for (i = 0; i < strlen(timestr); i++) {
@@ -2523,6 +2523,7 @@ out:
GF_FREE(priv);
}
mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
}
return ret;
}
@@ -2636,3 +2637,17 @@ struct volume_options options[] = {
.default_value = "{{ brick.path }}"},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "trash",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h
index 675f9f97350..6671617c2c6 100644
--- a/xlators/features/trash/src/trash.h
+++ b/xlators/features/trash/src/trash.h
@@ -10,11 +10,11 @@
#ifndef __TRASH_H__
#define __TRASH_H__
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "inode.c"
#include "fnmatch.h"
diff --git a/xlators/features/upcall/src/upcall-cache-invalidation.h b/xlators/features/upcall/src/upcall-cache-invalidation.h
index e509a89acd5..db649b2c9a6 100644
--- a/xlators/features/upcall/src/upcall-cache-invalidation.h
+++ b/xlators/features/upcall/src/upcall-cache-invalidation.h
@@ -15,10 +15,4 @@
* events post its last access */
#define CACHE_INVALIDATION_TIMEOUT "60"
-/* xlator options */
-gf_boolean_t
-is_cache_invalidation_enabled(xlator_t *this);
-int32_t
-get_cache_invalidation_timeout(xlator_t *this);
-
#endif /* __UPCALL_CACHE_INVALIDATION_H__ */
diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c
index 9d16e5f0ef8..c641bd6f432 100644
--- a/xlators/features/upcall/src/upcall-internal.c
+++ b/xlators/features/upcall/src/upcall-internal.c
@@ -12,20 +12,20 @@
#include <fcntl.h>
#include <limits.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
-#include "statedump.h"
-#include "syncop.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
#include "upcall.h"
#include "upcall-mem-types.h"
#include "glusterfs3-xdr.h"
#include "protocol-common.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
/*
* Check if any of the upcall options are enabled:
@@ -35,62 +35,37 @@ gf_boolean_t
is_upcall_enabled(xlator_t *this)
{
upcall_private_t *priv = NULL;
- gf_boolean_t is_enabled = _gf_false;
if (this->private) {
priv = (upcall_private_t *)this->private;
-
- if (priv->cache_invalidation_enabled) {
- is_enabled = _gf_true;
- }
+ return priv->cache_invalidation_enabled;
}
- return is_enabled;
+ return _gf_false;
}
/*
* Get the cache_invalidation_timeout
*/
-int32_t
+static int32_t
get_cache_invalidation_timeout(xlator_t *this)
{
upcall_private_t *priv = NULL;
- int32_t timeout = 0;
if (this->private) {
priv = (upcall_private_t *)this->private;
- timeout = priv->cache_invalidation_timeout;
- }
-
- return timeout;
-}
-
-/*
- * Allocate and add a new client entry to the given upcall entry
- */
-upcall_client_t *
-add_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx)
-{
- upcall_client_t *up_client_entry = NULL;
-
- pthread_mutex_lock(&up_inode_ctx->client_list_lock);
- {
- up_client_entry = __add_upcall_client(frame, client, up_inode_ctx);
+ return priv->cache_invalidation_timeout;
}
- pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
- return up_client_entry;
+ return 0;
}
-upcall_client_t *
+static upcall_client_t *
__add_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx)
+ upcall_inode_ctx_t *up_inode_ctx, time_t now)
{
- upcall_client_t *up_client_entry = NULL;
-
- up_client_entry = GF_CALLOC(1, sizeof(*up_client_entry),
- gf_upcall_mt_upcall_client_entry_t);
+ upcall_client_t *up_client_entry = GF_MALLOC(
+ sizeof(*up_client_entry), gf_upcall_mt_upcall_client_entry_t);
if (!up_client_entry) {
gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_NO_MEMORY,
"Memory allocation failed");
@@ -98,7 +73,7 @@ __add_upcall_client(call_frame_t *frame, client_t *client,
}
INIT_LIST_HEAD(&up_client_entry->client_list);
up_client_entry->client_uid = gf_strdup(client->client_uid);
- up_client_entry->access_time = time(NULL);
+ up_client_entry->access_time = now;
up_client_entry->expire_time_attr = get_cache_invalidation_timeout(
frame->this);
@@ -110,39 +85,7 @@ __add_upcall_client(call_frame_t *frame, client_t *client,
return up_client_entry;
}
-/*
- * Given client->uid, retrieve the corresponding upcall client entry.
- * If none found, create a new entry.
- */
-upcall_client_t *
-__get_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx)
-{
- upcall_client_t *up_client_entry = NULL;
- upcall_client_t *tmp = NULL;
- gf_boolean_t found_client = _gf_false;
-
- list_for_each_entry_safe(up_client_entry, tmp, &up_inode_ctx->client_list,
- client_list)
- {
- if (strcmp(client->client_uid, up_client_entry->client_uid) == 0) {
- /* found client entry. Update the access_time */
- up_client_entry->access_time = time(NULL);
- found_client = _gf_true;
- gf_log(THIS->name, GF_LOG_DEBUG, "upcall_entry_t client found - %s",
- up_client_entry->client_uid);
- break;
- }
- }
-
- if (!found_client) { /* create one */
- up_client_entry = __add_upcall_client(frame, client, up_inode_ctx);
- }
-
- return up_client_entry;
-}
-
-int
+static int
__upcall_inode_ctx_set(inode_t *inode, xlator_t *this)
{
upcall_inode_ctx_t *inode_ctx = NULL;
@@ -158,7 +101,7 @@ __upcall_inode_ctx_set(inode_t *inode, xlator_t *this)
if (!ret)
goto out;
- inode_ctx = GF_CALLOC(1, sizeof(upcall_inode_ctx_t),
+ inode_ctx = GF_MALLOC(sizeof(upcall_inode_ctx_t),
gf_upcall_mt_upcall_inode_ctx_t);
if (!inode_ctx) {
@@ -190,7 +133,7 @@ out:
return ret;
}
-upcall_inode_ctx_t *
+static upcall_inode_ctx_t *
__upcall_inode_ctx_get(inode_t *inode, xlator_t *this)
{
upcall_inode_ctx_t *inode_ctx = NULL;
@@ -229,8 +172,20 @@ upcall_inode_ctx_get(inode_t *inode, xlator_t *this)
return inode_ctx;
}
-int
-upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx)
+static int
+__upcall_cleanup_client_entry(upcall_client_t *up_client)
+{
+ list_del_init(&up_client->client_list);
+
+ GF_FREE(up_client->client_uid);
+ GF_FREE(up_client);
+
+ return 0;
+}
+
+static int
+upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx,
+ time_t now)
{
upcall_client_t *up_client = NULL;
upcall_client_t *tmp = NULL;
@@ -245,7 +200,7 @@ upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx)
list_for_each_entry_safe(up_client, tmp, &up_inode_ctx->client_list,
client_list)
{
- t_expired = time(NULL) - up_client->access_time;
+ t_expired = now - up_client->access_time;
if (t_expired > (2 * timeout)) {
gf_log(THIS->name, GF_LOG_TRACE, "Cleaning up client_entry(%s)",
@@ -269,17 +224,6 @@ out:
return ret;
}
-int
-__upcall_cleanup_client_entry(upcall_client_t *up_client)
-{
- list_del_init(&up_client->client_list);
-
- GF_FREE(up_client->client_uid);
- GF_FREE(up_client);
-
- return 0;
-}
-
/*
* Free Upcall inode_ctx client list
*/
@@ -298,6 +242,10 @@ __upcall_cleanup_inode_ctx_client_list(upcall_inode_ctx_t *inode_ctx)
return 0;
}
+static void
+upcall_cache_forget(xlator_t *this, inode_t *inode,
+ upcall_inode_ctx_t *up_inode_ctx);
+
/*
* Free upcall_inode_ctx
*/
@@ -360,6 +308,7 @@ upcall_reaper_thread(void *data)
upcall_inode_ctx_t *tmp = NULL;
xlator_t *this = NULL;
time_t timeout = 0;
+ time_t time_now;
this = (xlator_t *)data;
GF_ASSERT(this);
@@ -367,33 +316,35 @@ upcall_reaper_thread(void *data)
priv = this->private;
GF_ASSERT(priv);
+ time_now = gf_time();
while (!priv->fini) {
list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list,
inode_ctx_list)
{
/* cleanup expired clients */
- upcall_cleanup_expired_clients(this, inode_ctx);
+ upcall_cleanup_expired_clients(this, inode_ctx, time_now);
if (!inode_ctx->destroy) {
continue;
}
+ /* client list would have been cleaned up*/
+ gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)",
+ inode_ctx);
LOCK(&priv->inode_ctx_lk);
{
- /* client list would have been cleaned up*/
- gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)",
- inode_ctx);
list_del_init(&inode_ctx->inode_ctx_list);
pthread_mutex_destroy(&inode_ctx->client_list_lock);
- GF_FREE(inode_ctx);
- inode_ctx = NULL;
}
UNLOCK(&priv->inode_ctx_lk);
+ GF_FREE(inode_ctx);
+ inode_ctx = NULL;
}
/* don't do a very busy loop */
timeout = get_cache_invalidation_timeout(this);
sleep(timeout / 2);
+ time_now = gf_time();
}
return NULL;
@@ -486,6 +437,13 @@ up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs)
return ret;
}
+static void
+upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid,
+ upcall_client_t *up_client_entry, uint32_t flags,
+ struct iatt *stbuf, struct iatt *p_stbuf,
+ struct iatt *oldp_stbuf, dict_t *xattr,
+ time_t now);
+
gf_boolean_t
up_invalidate_needed(dict_t *xattrs)
{
@@ -520,6 +478,8 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
upcall_client_t *tmp = NULL;
upcall_inode_ctx_t *up_inode_ctx = NULL;
gf_boolean_t found = _gf_false;
+ time_t time_now;
+ inode_t *linked_inode = NULL;
if (!is_upcall_enabled(this))
return;
@@ -532,7 +492,20 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
return;
}
- if (inode)
+ /* For nameless LOOKUPs, inode created shall always be
+ * invalid. Hence check if there is any already linked inode.
+ * If yes, update the inode_ctx of that valid inode
+ */
+ if (inode && (inode->ia_type == IA_INVAL) && stbuf) {
+ linked_inode = inode_find(inode->table, stbuf->ia_gfid);
+ if (linked_inode) {
+ gf_log("upcall", GF_LOG_DEBUG,
+ "upcall_inode_ctx_get of linked inode (%p)", inode);
+ up_inode_ctx = upcall_inode_ctx_get(linked_inode, this);
+ }
+ }
+
+ if (inode && !up_inode_ctx)
up_inode_ctx = upcall_inode_ctx_get(inode, this);
if (!up_inode_ctx) {
@@ -560,6 +533,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
goto out;
}
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
@@ -567,7 +541,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
{
/* Do not send UPCALL event if same client. */
if (!strcmp(client->client_uid, up_client_entry->client_uid)) {
- up_client_entry->access_time = time(NULL);
+ up_client_entry->access_time = time_now;
found = _gf_true;
continue;
}
@@ -589,17 +563,21 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
* Also if the file is frequently accessed, set
* expire_time_attr to 0.
*/
- upcall_client_cache_invalidate(this, up_inode_ctx->gfid,
- up_client_entry, flags, stbuf,
- p_stbuf, oldp_stbuf, xattr);
+ upcall_client_cache_invalidate(
+ this, up_inode_ctx->gfid, up_client_entry, flags, stbuf,
+ p_stbuf, oldp_stbuf, xattr, time_now);
}
if (!found) {
- up_client_entry = __add_upcall_client(frame, client, up_inode_ctx);
+ up_client_entry = __add_upcall_client(frame, client, up_inode_ctx,
+ time_now);
}
}
pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
out:
+ /* release the ref from inode_find */
+ if (linked_inode)
+ inode_unref(linked_inode);
return;
}
@@ -607,11 +585,12 @@ out:
* If the upcall_client_t has recently accessed the file (i.e, within
* priv->cache_invalidation_timeout), send a upcall notification.
*/
-void
+static void
upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid,
upcall_client_t *up_client_entry, uint32_t flags,
struct iatt *stbuf, struct iatt *p_stbuf,
- struct iatt *oldp_stbuf, dict_t *xattr)
+ struct iatt *oldp_stbuf, dict_t *xattr,
+ time_t now)
{
struct gf_upcall up_req = {
0,
@@ -621,7 +600,7 @@ upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid,
};
time_t timeout = 0;
int ret = -1;
- time_t t_expired = time(NULL) - up_client_entry->access_time;
+ time_t t_expired = now - up_client_entry->access_time;
GF_VALIDATE_OR_GOTO("upcall_client_cache_invalidate",
!(gf_uuid_is_null(gfid)), out);
@@ -678,32 +657,32 @@ out:
* Send "UP_FORGET" to all the clients so that they invalidate their cache
* entry and do a fresh lookup next time when any I/O comes in.
*/
-void
+static void
upcall_cache_forget(xlator_t *this, inode_t *inode,
upcall_inode_ctx_t *up_inode_ctx)
{
upcall_client_t *up_client_entry = NULL;
upcall_client_t *tmp = NULL;
- uint32_t flags = 0;
+ uint32_t flags = UP_FORGET;
+ time_t time_now;
if (!up_inode_ctx) {
return;
}
+ time_now = gf_time();
pthread_mutex_lock(&up_inode_ctx->client_list_lock);
{
list_for_each_entry_safe(up_client_entry, tmp,
&up_inode_ctx->client_list, client_list)
{
- flags = UP_FORGET;
-
- /* Set the access time to time(NULL)
+ /* Set the access time to gf_time()
* to send notify */
- up_client_entry->access_time = time(NULL);
+ up_client_entry->access_time = time_now;
upcall_client_cache_invalidate(this, up_inode_ctx->gfid,
up_client_entry, flags, NULL, NULL,
- NULL, NULL);
+ NULL, NULL, time_now);
}
}
pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
diff --git a/xlators/features/upcall/src/upcall-mem-types.h b/xlators/features/upcall/src/upcall-mem-types.h
index 079677ff79c..f9883d9d72c 100644
--- a/xlators/features/upcall/src/upcall-mem-types.h
+++ b/xlators/features/upcall/src/upcall-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __UPCALL_MEM_TYPES_H__
#define __UPCALL_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_upcall_mem_types_ {
gf_upcall_mt_conf_t = gf_common_mt_end + 1,
diff --git a/xlators/features/upcall/src/upcall-messages.h b/xlators/features/upcall/src/upcall-messages.h
index db5cac1e07d..4095a34c200 100644
--- a/xlators/features/upcall/src/upcall-messages.h
+++ b/xlators/features/upcall/src/upcall-messages.h
@@ -11,7 +11,7 @@
#ifndef _UPCALL_MESSAGES_H_
#define _UPCALL_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c
index 5fdd4993003..0795f58059d 100644
--- a/xlators/features/upcall/src/upcall.c
+++ b/xlators/features/upcall/src/upcall.c
@@ -13,19 +13,19 @@
#include <limits.h>
#include <pthread.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "upcall.h"
#include "upcall-mem-types.h"
#include "glusterfs3-xdr.h"
#include "protocol-common.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
static int32_t
up_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
@@ -57,14 +57,13 @@ static int32_t
up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
fd_t *fd, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -111,14 +110,13 @@ up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
int count, off_t off, uint32_t flags, struct iobref *iobref,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -167,14 +165,13 @@ static int32_t
up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -220,14 +217,13 @@ static int32_t
up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
struct gf_flock *flock, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -274,14 +270,13 @@ static int32_t
up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -343,14 +338,13 @@ static int32_t
up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -410,14 +404,13 @@ static int32_t
up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -472,14 +465,13 @@ static int32_t
up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -531,14 +523,13 @@ static int32_t
up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -592,14 +583,13 @@ static int32_t
up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -653,14 +643,13 @@ static int32_t
up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *params)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -717,15 +706,13 @@ static int32_t
up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
-
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -774,14 +761,13 @@ out:
static int32_t
up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -826,14 +812,13 @@ out:
static int32_t
up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -852,14 +837,13 @@ err:
static int32_t
up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -879,14 +863,13 @@ static int32_t
up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -932,14 +915,13 @@ static int32_t
up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -986,14 +968,13 @@ static int32_t
up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1047,14 +1028,13 @@ static int32_t
up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t rdev, mode_t umask, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1110,14 +1090,13 @@ static int32_t
up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
loc_t *loc, mode_t umask, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1164,14 +1143,13 @@ static int32_t
up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1216,14 +1194,13 @@ out:
static int32_t
up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1270,14 +1247,13 @@ static int32_t
up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1334,14 +1310,13 @@ static int32_t
up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t off, dict_t *dict)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1361,14 +1336,13 @@ static int32_t
up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1415,14 +1389,13 @@ static int32_t
up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
off_t offset, size_t len, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1470,14 +1443,13 @@ static int32_t
up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
size_t len, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1524,14 +1496,13 @@ static int
up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
off_t len, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1577,14 +1548,13 @@ static int32_t
up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
gf_seek_what_t what, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1652,14 +1622,13 @@ static int32_t
up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1727,14 +1696,13 @@ static int32_t
up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1800,7 +1768,7 @@ static int32_t
up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
dict_t *xattr = NULL;
@@ -1808,13 +1776,11 @@ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
xattr = dict_for_key_value(name, "", 1, _gf_true);
if (!xattr) {
- op_errno = ENOMEM;
goto err;
}
local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1885,7 +1851,7 @@ static int32_t
up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
dict_t *xattr = NULL;
@@ -1893,13 +1859,11 @@ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
xattr = dict_for_key_value(name, "", 1, _gf_true);
if (!xattr) {
- op_errno = ENOMEM;
goto err;
}
local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -1950,14 +1914,13 @@ static int32_t
up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -2000,14 +1963,13 @@ static int32_t
up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
dict_t *xdata)
{
- int32_t op_errno = -1;
+ int32_t op_errno = ENOMEM;
upcall_local_t *local = NULL;
EXIT_IF_UPCALL_OFF(this, out);
local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
if (!local) {
- op_errno = ENOMEM;
goto err;
}
@@ -2335,14 +2297,14 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
upcall_private_t *priv = NULL;
priv = this->private;
if (!priv) {
- return 0;
+ return;
}
this->private = NULL;
@@ -2367,7 +2329,7 @@ fini(xlator_t *this)
this->local_pool = NULL;
}
- return 0;
+ return;
}
int
@@ -2527,3 +2489,17 @@ struct volume_options options[] = {
.tags = {"cache", "cachetimeout", "upcall"}},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "upcall",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/upcall/src/upcall.h b/xlators/features/upcall/src/upcall.h
index 3797e62aac3..aa535088ad7 100644
--- a/xlators/features/upcall/src/upcall.h
+++ b/xlators/features/upcall/src/upcall.h
@@ -10,12 +10,12 @@
#ifndef __UPCALL_H__
#define __UPCALL_H__
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "upcall-mem-types.h"
-#include "client_t.h"
+#include <glusterfs/client_t.h>
#include "upcall-messages.h"
#include "upcall-cache-invalidation.h"
-#include "upcall-utils.h"
+#include <glusterfs/upcall-utils.h>
#define EXIT_IF_UPCALL_OFF(this, label) \
do { \
@@ -100,32 +100,10 @@ upcall_local_t *
upcall_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
inode_t *inode, dict_t *xattr);
-upcall_client_t *
-add_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx);
-upcall_client_t *
-__add_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx);
-upcall_client_t *
-__get_upcall_client(call_frame_t *frame, client_t *client,
- upcall_inode_ctx_t *up_inode_ctx);
-int
-__upcall_cleanup_client_entry(upcall_client_t *up_client);
-int
-upcall_cleanup_expired_clients(xlator_t *this,
- upcall_inode_ctx_t *up_inode_ctx);
-
-int
-__upcall_inode_ctx_set(inode_t *inode, xlator_t *this);
-upcall_inode_ctx_t *
-__upcall_inode_ctx_get(inode_t *inode, xlator_t *this);
upcall_inode_ctx_t *
upcall_inode_ctx_get(inode_t *inode, xlator_t *this);
int
upcall_cleanup_inode_ctx(xlator_t *this, inode_t *inode);
-void
-upcall_cache_forget(xlator_t *this, inode_t *inode,
- upcall_inode_ctx_t *up_inode_ctx);
void *
upcall_reaper_thread(void *data);
@@ -142,12 +120,6 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
inode_t *inode, uint32_t flags, struct iatt *stbuf,
struct iatt *p_stbuf, struct iatt *oldp_stbuf,
dict_t *xattr);
-void
-upcall_client_cache_invalidate(xlator_t *xl, uuid_t gfid,
- upcall_client_t *up_client_entry, uint32_t flags,
- struct iatt *stbuf, struct iatt *p_stbuf,
- struct iatt *oldp_stbuf, dict_t *xattr);
-
int
up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs);
diff --git a/xlators/features/utime/src/utime-autogen-fops-tmpl.c b/xlators/features/utime/src/utime-autogen-fops-tmpl.c
index b4be66eebd1..f2f35322926 100644
--- a/xlators/features/utime/src/utime-autogen-fops-tmpl.c
+++ b/xlators/features/utime/src/utime-autogen-fops-tmpl.c
@@ -18,11 +18,11 @@
#include "config.h"
#endif
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "statedump.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/statedump.h>
#include "utime-helpers.h"
-#include "timespec.h"
+#include <glusterfs/timespec.h>
#pragma generate
diff --git a/xlators/features/utime/src/utime-autogen-fops-tmpl.h b/xlators/features/utime/src/utime-autogen-fops-tmpl.h
index e2e807cdf64..4e102ffed6c 100644
--- a/xlators/features/utime/src/utime-autogen-fops-tmpl.h
+++ b/xlators/features/utime/src/utime-autogen-fops-tmpl.h
@@ -15,7 +15,7 @@
#ifndef _UTIME_AUTOGEN_FOPS_H
#define _UTIME_AUTOGEN_FOPS_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#pragma generate
diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py
index ab56dc9a4b3..9fb3e1b8b1a 100755
--- a/xlators/features/utime/src/utime-gen-fops-c.py
+++ b/xlators/features/utime/src/utime-gen-fops-c.py
@@ -62,6 +62,20 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
}
"""
+FOPS_COPY_FILE_RANGE_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ gl_timespec_get(&frame->root->ctime);
+
+ (void) utime_update_attribute_flags(frame, this, GF_FOP_COPY_FILE_RANGE);
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
FOPS_SETATTR_TEMPLATE = """
int32_t
gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
@@ -81,6 +95,16 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
frame->root->flags |= MDATA_CTIME;
}
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ if (valid & GF_ATTR_ATIME_NOW) {
+ frame->root->ctime.tv_sec = stbuf->ia_atime;
+ frame->root->ctime.tv_nsec = stbuf->ia_atime_nsec;
+ } else if (valid & GF_ATTR_MTIME_NOW) {
+ frame->root->ctime.tv_sec = stbuf->ia_mtime;
+ frame->root->ctime.tv_nsec = stbuf->ia_mtime_nsec;
+ }
+ }
+
STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
return 0;
@@ -94,6 +118,7 @@ utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',
utime_read_op = ['readv']
utime_write_op = ['writev']
utime_setattr_ops = ['setattr', 'fsetattr']
+utime_copy_file_range_ops = ['copy_file_range']
def gen_defaults():
for name in ops:
@@ -109,6 +134,9 @@ def gen_defaults():
if name in utime_setattr_ops:
print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
print(generate(FOPS_SETATTR_TEMPLATE, name, fop_subs))
+ if name in utime_copy_file_range_ops:
+ print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
+ print(generate(FOPS_COPY_FILE_RANGE_TEMPLATE, name, fop_subs))
for l in open(sys.argv[1], 'r').readlines():
if l.find('#pragma generate') != -1:
diff --git a/xlators/features/utime/src/utime-gen-fops-h.py b/xlators/features/utime/src/utime-gen-fops-h.py
index 3686f2e3c1e..e96274c229a 100755
--- a/xlators/features/utime/src/utime-gen-fops-h.py
+++ b/xlators/features/utime/src/utime-gen-fops-h.py
@@ -18,7 +18,7 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',
'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate',
'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr',
- 'readv', 'writev', 'setattr', 'fsetattr']
+ 'readv', 'writev', 'setattr', 'fsetattr', 'copy_file_range']
def gen_defaults():
for name, value in ops.items():
diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c
index c79e12badfa..29d9ad93561 100644
--- a/xlators/features/utime/src/utime-helpers.c
+++ b/xlators/features/utime/src/utime-helpers.c
@@ -17,7 +17,7 @@ gl_timespec_get(struct timespec *ts)
#ifdef TIME_UTC
timespec_get(ts, TIME_UTC);
#else
- timespec_now(ts);
+ timespec_now_realtime(ts);
#endif
}
@@ -93,6 +93,15 @@ utime_update_attribute_flags(call_frame_t *frame, xlator_t *this,
frame->root->flags |= MDATA_CTIME;
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ /* Below 2 are for destination fd */
+ frame->root->flags |= MDATA_CTIME;
+ frame->root->flags |= MDATA_MTIME;
+ /* Below flag is for the source fd */
+ if (!utime_priv->noatime) {
+ frame->root->flags |= MDATA_ATIME;
+ }
+ break;
default:
frame->root->flags = 0;
}
diff --git a/xlators/features/utime/src/utime-helpers.h b/xlators/features/utime/src/utime-helpers.h
index b89867a3db3..2e32d4bece6 100644
--- a/xlators/features/utime/src/utime-helpers.h
+++ b/xlators/features/utime/src/utime-helpers.h
@@ -11,10 +11,9 @@
#ifndef _UTIME_HELPERS_H
#define _UTIME_HELPERS_H
-#include "glusterfs-fops.h"
-#include "stack.h"
-#include "xlator.h"
-#include "timespec.h"
+#include <glusterfs/stack.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/timespec.h>
#include <time.h>
void
diff --git a/xlators/features/utime/src/utime-mem-types.h b/xlators/features/utime/src/utime-mem-types.h
index fbd9aff0eca..ad1255f85f3 100644
--- a/xlators/features/utime/src/utime-mem-types.h
+++ b/xlators/features/utime/src/utime-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __UTIME_MEM_TYPES_H__
#define __UTIME_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_utime_mem_types_ {
utime_mt_utime_t = gf_common_mt_end + 1,
diff --git a/xlators/features/utime/src/utime-messages.h b/xlators/features/utime/src/utime-messages.h
index 7613c335d43..bd40265abaf 100644
--- a/xlators/features/utime/src/utime-messages.h
+++ b/xlators/features/utime/src/utime-messages.h
@@ -11,7 +11,7 @@
#ifndef __UTIME_MESSAGES_H__
#define __UTIME_MESSAGES_H__
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
/* To add new message IDs, append new identifiers at the end of the list.
*
@@ -23,6 +23,7 @@
* glfs-message-id.h.
*/
-GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY);
+GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY, UTIME_MSG_SET_MDATA_FAILED,
+ UTIME_MSG_DICT_SET_FAILED);
#endif /* __UTIME_MESSAGES_H__ */
diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c
index 418e4c4a0d5..2acc63e6a05 100644
--- a/xlators/features/utime/src/utime.c
+++ b/xlators/features/utime/src/utime.c
@@ -9,8 +9,10 @@
*/
#include "utime.h"
+#include "utime-helpers.h"
#include "utime-messages.h"
#include "utime-mem-types.h"
+#include <glusterfs/call-stub.h>
int32_t
gf_utime_invalidate(xlator_t *this, inode_t *inode)
@@ -133,6 +135,141 @@ mem_acct_init(xlator_t *this)
}
int32_t
+gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xdata)
+{
+ call_stub_t *stub = frame->local;
+ /* Don't fail lookup if mdata setxattr fails */
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED,
+ "dict set of key for set-ctime-mdata failed");
+ }
+ frame->local = NULL;
+ call_resume(stub);
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+int32_t
+gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ dict_t *dict = NULL;
+ struct mdata_iatt *mdata = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ call_frame_t *new_frame = NULL;
+
+ if (!op_ret && dict_get(xdata, GF_XATTR_MDATA_KEY) == NULL) {
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ mdata = GF_MALLOC(sizeof(struct mdata_iatt), gf_common_mt_char);
+ if (mdata == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ iatt_to_mdata(mdata, stbuf);
+ ret = dict_set_mdata(dict, CTIME_MDATA_XDATA_KEY, mdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY,
+ "dict set of key for set-ctime-mdata failed");
+ goto err;
+ }
+ new_frame = copy_frame(frame);
+ if (!new_frame) {
+ op_errno = ENOMEM;
+ goto stub_err;
+ }
+
+ new_frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk,
+ op_ret, op_errno, inode, stbuf,
+ xdata, postparent);
+ if (!new_frame->local) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY,
+ "lookup_cbk stub allocation failed");
+ op_errno = ENOMEM;
+ STACK_DESTROY(new_frame->root);
+ goto stub_err;
+ }
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, stbuf->ia_gfid);
+
+ new_frame->root->uid = 0;
+ new_frame->root->gid = 0;
+ new_frame->root->pid = GF_CLIENT_PID_SET_UTIME;
+ STACK_WIND(new_frame, gf_utime_set_mdata_setxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, &loc,
+ dict, 0, NULL);
+
+ dict_unref(dict);
+ inode_unref(loc.inode);
+ return 0;
+ }
+
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,
+ postparent);
+ return 0;
+
+err:
+ if (mdata) {
+ GF_FREE(mdata);
+ }
+stub_err:
+ if (dict) {
+ dict_unref(dict);
+ }
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+gf_utime_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ xdata = xdata ? dict_ref(xdata) : dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_int8(xdata, GF_XATTR_MDATA_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, UTIME_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GF_XATTR_MDATA_KEY);
+ op_errno = -ret;
+ goto free_dict;
+ }
+
+ STACK_WIND(frame, gf_utime_set_mdata_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ return 0;
+
+free_dict:
+ dict_unref(xdata);
+err:
+ STACK_UNWIND_STRICT(lookup, frame, ret, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
init(xlator_t *this)
{
utime_priv_t *utime = NULL;
@@ -182,19 +319,27 @@ notify(xlator_t *this, int event, void *data, ...)
}
struct xlator_fops fops = {
- /* TODO: Need to go through other fops and
- * check if they modify time attributes
- */
- .rename = gf_utime_rename, .mknod = gf_utime_mknod,
- .readv = gf_utime_readv, .fremovexattr = gf_utime_fremovexattr,
- .open = gf_utime_open, .create = gf_utime_create,
- .mkdir = gf_utime_mkdir, .writev = gf_utime_writev,
- .rmdir = gf_utime_rmdir, .fallocate = gf_utime_fallocate,
- .truncate = gf_utime_truncate, .symlink = gf_utime_symlink,
- .zerofill = gf_utime_zerofill, .link = gf_utime_link,
- .ftruncate = gf_utime_ftruncate, .unlink = gf_utime_unlink,
- .setattr = gf_utime_setattr, .fsetattr = gf_utime_fsetattr,
- .opendir = gf_utime_opendir, .removexattr = gf_utime_removexattr,
+ .rename = gf_utime_rename,
+ .mknod = gf_utime_mknod,
+ .readv = gf_utime_readv,
+ .fremovexattr = gf_utime_fremovexattr,
+ .open = gf_utime_open,
+ .create = gf_utime_create,
+ .mkdir = gf_utime_mkdir,
+ .writev = gf_utime_writev,
+ .rmdir = gf_utime_rmdir,
+ .fallocate = gf_utime_fallocate,
+ .truncate = gf_utime_truncate,
+ .symlink = gf_utime_symlink,
+ .zerofill = gf_utime_zerofill,
+ .link = gf_utime_link,
+ .ftruncate = gf_utime_ftruncate,
+ .unlink = gf_utime_unlink,
+ .setattr = gf_utime_setattr,
+ .fsetattr = gf_utime_fsetattr,
+ .opendir = gf_utime_opendir,
+ .removexattr = gf_utime_removexattr,
+ .lookup = gf_utime_lookup,
};
struct xlator_cbks cbks = {
.invalidate = gf_utime_invalidate,
@@ -230,3 +375,18 @@ struct volume_options options[] = {
"enabled. When noatime is on, atime is not updated with "
"ctime feature enabled and vice versa."},
{.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_5_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "utime",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/utime/src/utime.h b/xlators/features/utime/src/utime.h
index 236183d4bcc..ba55eec00de 100644
--- a/xlators/features/utime/src/utime.h
+++ b/xlators/features/utime/src/utime.h
@@ -11,9 +11,9 @@
#ifndef __UTIME_H__
#define __UTIME_H__
-#include "glusterfs.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "utime-autogen-fops.h"
typedef struct utime_priv {
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c
index e8e22bb7830..8075fa0c29f 100644
--- a/xlators/lib/src/libxlator.c
+++ b/xlators/lib/src/libxlator.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "mem-types.h"
+
#include "libxlator.h"
int marker_xtime_default_gauge[] = {
@@ -198,10 +198,11 @@ cluster_markerxtime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
if (dict_get_ptr(dict, marker_xattr, (void **)&net_timebuf)) {
+ local->count[MCNT_NOTFOUND]++;
+ UNLOCK(&frame->lock);
gf_log(this->name, GF_LOG_WARNING,
"Unable to get <uuid>.xtime attr");
- local->count[MCNT_NOTFOUND]++;
- goto unlock;
+ goto post_unlock;
}
if (local->count[MCNT_FOUND]) {
@@ -221,7 +222,7 @@ cluster_markerxtime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
unlock:
UNLOCK(&frame->lock);
-
+post_unlock:
if (callcnt == 0)
cluster_marker_unwind(frame, marker_xattr, local->net_timebuf, 8, dict);
@@ -266,17 +267,17 @@ cluster_markeruuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
goto unlock;
} else if (volmark->retval) {
GF_FREE(local->volmark);
- local->volmark = memdup(volmark, sizeof(*volmark));
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
local->retval = volmark->retval;
} else if ((volmark->sec > local->volmark->sec) ||
((volmark->sec == local->volmark->sec) &&
(volmark->usec >= local->volmark->usec))) {
GF_FREE(local->volmark);
- local->volmark = memdup(volmark, sizeof(*volmark));
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
}
} else {
- local->volmark = memdup(volmark, sizeof(*volmark));
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
VALIDATE_OR_GOTO(local->volmark, unlock);
gf_uuid_unparse(volmark->uuid, vol_uuid);
if (volmark->retval)
diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h
index 80224f9df38..81da4060d55 100644
--- a/xlators/lib/src/libxlator.h
+++ b/xlators/lib/src/libxlator.h
@@ -10,12 +10,14 @@
#ifndef _LIBXLATOR_H
#define _LIBXLATOR_H
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat.h"
-#include "compat-errno.h"
+#include <glusterfs/defaults.h>
+
+#include <stdint.h> // for int32_t
+#include "glusterfs/dict.h" // for dict_t, data_t
+#include "glusterfs/globals.h" // for xlator_t, loc_t
+#include "glusterfs/stack.h" // for call_frame_t
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
#define MARKER_XATTR_PREFIX "trusted.glusterfs"
#define XTIME "xtime"
diff --git a/xlators/meta/src/active-link.c b/xlators/meta/src/active-link.c
index 6c060455887..7ee780d89e9 100644
--- a/xlators/meta/src/active-link.c
+++ b/xlators/meta/src/active-link.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/cmdline-file.c b/xlators/meta/src/cmdline-file.c
index 3d6a020ac6c..eb24e985af9 100644
--- a/xlators/meta/src/cmdline-file.c
+++ b/xlators/meta/src/cmdline-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
cmdline_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/frames-file.c b/xlators/meta/src/frames-file.c
index 1316aa71ce8..9a13db9a934 100644
--- a/xlators/meta/src/frames-file.c
+++ b/xlators/meta/src/frames-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
frames_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
@@ -30,9 +30,10 @@ frames_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
pool = this->ctx->pool;
+ strprintf(strfd, "{ \n\t\"Stack\": [\n");
+
LOCK(&pool->lock);
{
- strprintf(strfd, "{ \n\t\"Stack\": [\n");
list_for_each_entry(stack, &pool->all_frames, all_frames)
{
strprintf(strfd, "\t {\n");
diff --git a/xlators/meta/src/graph-dir.c b/xlators/meta/src/graph-dir.c
index 606fea904be..a8f4787880d 100644
--- a/xlators/meta/src/graph-dir.c
+++ b/xlators/meta/src/graph-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/graphs-dir.c b/xlators/meta/src/graphs-dir.c
index 7c8f4276c03..a1ffbca7d5a 100644
--- a/xlators/meta/src/graphs-dir.c
+++ b/xlators/meta/src/graphs-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/history-file.c b/xlators/meta/src/history-file.c
index a78fe27a54a..7742a635fed 100644
--- a/xlators/meta/src/history-file.c
+++ b/xlators/meta/src/history-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "statedump.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
static int
history_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/logfile-link.c b/xlators/meta/src/logfile-link.c
index 61cd4fee852..616a54518c0 100644
--- a/xlators/meta/src/logfile-link.c
+++ b/xlators/meta/src/logfile-link.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/logging-dir.c b/xlators/meta/src/logging-dir.c
index 590b5532b38..46e6f9e95dd 100644
--- a/xlators/meta/src/logging-dir.c
+++ b/xlators/meta/src/logging-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/loglevel-file.c b/xlators/meta/src/loglevel-file.c
index 3f3f2707dc0..eeeeeaa5907 100644
--- a/xlators/meta/src/loglevel-file.c
+++ b/xlators/meta/src/loglevel-file.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
+#include <glusterfs/strfd.h>
static int
loglevel_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/mallinfo-file.c b/xlators/meta/src/mallinfo-file.c
index d04fe9f4c1e..b4396d72189 100644
--- a/xlators/meta/src/mallinfo-file.c
+++ b/xlators/meta/src/mallinfo-file.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
static int
mallinfo_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/measure-file.c b/xlators/meta/src/measure-file.c
index 1529608bb25..52e92e48590 100644
--- a/xlators/meta/src/measure-file.c
+++ b/xlators/meta/src/measure-file.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
+#include <glusterfs/strfd.h>
static int
measure_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/meminfo-file.c b/xlators/meta/src/meminfo-file.c
index d6122ae5013..d889dfb2ae8 100644
--- a/xlators/meta/src/meminfo-file.c
+++ b/xlators/meta/src/meminfo-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "statedump.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
static int
meminfo_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/meta-defaults.c b/xlators/meta/src/meta-defaults.c
index b39e9f21ff7..91c328473f8 100644
--- a/xlators/meta/src/meta-defaults.c
+++ b/xlators/meta/src/meta-defaults.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
int
meta_default_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
@@ -145,11 +145,11 @@ meta_default_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
return default_readv_failure_cbk(frame, ENOMEM);
}
+ iov.iov_base = iobuf_ptr(iobuf);
+
/* iobref would have taken a ref */
iobuf_unref(iobuf);
- iov.iov_base = iobuf_ptr(iobuf);
-
copy_offset = min(meta_fd->size, offset);
copy_size = min(size, (meta_fd->size - copy_offset));
@@ -244,7 +244,7 @@ meta_default_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc,
int len = -1;
ops = meta_ops_get(loc->inode, this);
- if (!ops->link_fill) {
+ if (!ops || !ops->link_fill) {
META_STACK_UNWIND(readlink, frame, -1, EPERM, 0, 0, 0);
return 0;
}
diff --git a/xlators/meta/src/meta-helpers.c b/xlators/meta/src/meta-helpers.c
index d7d59c71296..cb54f547468 100644
--- a/xlators/meta/src/meta-helpers.c
+++ b/xlators/meta/src/meta-helpers.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
@@ -182,14 +182,15 @@ meta_uuid_copy(uuid_t dst, uuid_t src)
}
static void
-default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
+default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type,
+ gf_boolean_t is_tunable)
{
struct timeval tv = {};
iatt->ia_type = type;
switch (type) {
case IA_IFDIR:
- iatt->ia_prot = ia_prot_from_st_mode(0755);
+ iatt->ia_prot = ia_prot_from_st_mode(0555);
iatt->ia_nlink = 2;
break;
case IA_IFLNK:
@@ -197,7 +198,7 @@ default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
iatt->ia_nlink = 1;
break;
default:
- iatt->ia_prot = ia_prot_from_st_mode(0644);
+ iatt->ia_prot = ia_prot_from_st_mode(is_tunable ? 0644 : 0444);
iatt->ia_nlink = 1;
break;
}
@@ -225,7 +226,7 @@ meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
return;
if (!ops->iatt_fill)
- default_meta_iatt_fill(iatt, inode, type);
+ default_meta_iatt_fill(iatt, inode, type, !!ops->file_write);
else
ops->iatt_fill(THIS, inode, iatt);
return;
diff --git a/xlators/meta/src/meta-hooks.h b/xlators/meta/src/meta-hooks.h
index 2ee006f7876..7208641398a 100644
--- a/xlators/meta/src/meta-hooks.h
+++ b/xlators/meta/src/meta-hooks.h
@@ -10,7 +10,7 @@
#ifndef __META_HOOKS_H
#define __META_HOOKS_H
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#define DECLARE_HOOK(name) \
int meta_##name##_hook(call_frame_t *frame, xlator_t *this, loc_t *loc, \
diff --git a/xlators/meta/src/meta-mem-types.h b/xlators/meta/src/meta-mem-types.h
index 0fdfea4fac6..033c306682f 100644
--- a/xlators/meta/src/meta-mem-types.h
+++ b/xlators/meta/src/meta-mem-types.h
@@ -11,7 +11,7 @@
#ifndef __META_MEM_TYPES_H__
#define __META_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_meta_mem_types_ {
gf_meta_mt_priv_t = gf_common_mt_end + 1,
diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c
index 9294bbdae0c..e1b9a2b6581 100644
--- a/xlators/meta/src/meta.c
+++ b/xlators/meta/src/meta.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
@@ -226,11 +226,11 @@ out:
return ret;
}
-int
+void
fini(xlator_t *this)
{
GF_FREE(this->private);
- return 0;
+ return;
}
struct xlator_fops fops = {.lookup = meta_lookup,
@@ -262,3 +262,15 @@ struct volume_options options[] = {
.description = "Name of default meta directory."},
{.key = {NULL}},
};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "meta",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h
index 10609a9ec05..7f0cf28808a 100644
--- a/xlators/meta/src/meta.h
+++ b/xlators/meta/src/meta.h
@@ -10,7 +10,7 @@
#ifndef __META_H__
#define __META_H__
-#include "strfd.h"
+#include <glusterfs/strfd.h>
#define DEFAULT_META_DIR_NAME ".meta"
diff --git a/xlators/meta/src/name-file.c b/xlators/meta/src/name-file.c
index edba7142a69..5874a24d78a 100644
--- a/xlators/meta/src/name-file.c
+++ b/xlators/meta/src/name-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
name_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/option-file.c b/xlators/meta/src/option-file.c
index 132b28fa86a..ff55eca592f 100644
--- a/xlators/meta/src/option-file.c
+++ b/xlators/meta/src/option-file.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/options-dir.c b/xlators/meta/src/options-dir.c
index bf2d07a3701..d68a7eeaffc 100644
--- a/xlators/meta/src/options-dir.c
+++ b/xlators/meta/src/options-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/private-file.c b/xlators/meta/src/private-file.c
index 068c8ebf191..23ec319456b 100644
--- a/xlators/meta/src/private-file.c
+++ b/xlators/meta/src/private-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "statedump.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
static int
private_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/process_uuid-file.c b/xlators/meta/src/process_uuid-file.c
index bbf2e19f86e..a24c1b57ab3 100644
--- a/xlators/meta/src/process_uuid-file.c
+++ b/xlators/meta/src/process_uuid-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
process_uuid_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/profile-file.c b/xlators/meta/src/profile-file.c
index 9eb5c050dbe..829dcb77451 100644
--- a/xlators/meta/src/profile-file.c
+++ b/xlators/meta/src/profile-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "statedump.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
static int
profile_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/root-dir.c b/xlators/meta/src/root-dir.c
index 18d4fd6b8ec..80292bd3dda 100644
--- a/xlators/meta/src/root-dir.c
+++ b/xlators/meta/src/root-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/subvolume-link.c b/xlators/meta/src/subvolume-link.c
index 45d909daca2..5b1f752efd0 100644
--- a/xlators/meta/src/subvolume-link.c
+++ b/xlators/meta/src/subvolume-link.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/subvolumes-dir.c b/xlators/meta/src/subvolumes-dir.c
index e9582c53cf7..3cb170ea1f4 100644
--- a/xlators/meta/src/subvolumes-dir.c
+++ b/xlators/meta/src/subvolumes-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/top-link.c b/xlators/meta/src/top-link.c
index bfee742a1be..33f0d407411 100644
--- a/xlators/meta/src/top-link.c
+++ b/xlators/meta/src/top-link.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/type-file.c b/xlators/meta/src/type-file.c
index 00141275957..ece342a0b2a 100644
--- a/xlators/meta/src/type-file.c
+++ b/xlators/meta/src/type-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
type_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/version-file.c b/xlators/meta/src/version-file.c
index 7331684238c..36276fb810a 100644
--- a/xlators/meta/src/version-file.c
+++ b/xlators/meta/src/version-file.c
@@ -8,13 +8,13 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
-#include "lkowner.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
static int
version_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
diff --git a/xlators/meta/src/view-dir.c b/xlators/meta/src/view-dir.c
index 313bf6d7124..30931061567 100644
--- a/xlators/meta/src/view-dir.c
+++ b/xlators/meta/src/view-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/meta/src/volfile-file.c b/xlators/meta/src/volfile-file.c
index 5242a14bb8c..b2e2562ab8b 100644
--- a/xlators/meta/src/volfile-file.c
+++ b/xlators/meta/src/volfile-file.c
@@ -8,12 +8,12 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
-#include "strfd.h"
+#include <glusterfs/strfd.h>
static int
xldump_options(dict_t *this, char *key, data_t *value, void *strfd)
diff --git a/xlators/meta/src/xlator-dir.c b/xlators/meta/src/xlator-dir.c
index 1715cbb56a0..86189715790 100644
--- a/xlators/meta/src/xlator-dir.c
+++ b/xlators/meta/src/xlator-dir.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "meta-mem-types.h"
#include "meta.h"
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
index 6d09e37477c..685beb42d27 100644
--- a/xlators/mgmt/glusterd/src/Makefile.am
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -5,31 +5,34 @@ endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) \
-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\" \
- -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\"
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -I$(top_srcdir)/libglusterd/src/
+
glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
glusterd-volgen.c glusterd-rebalance.c \
glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \
- glusterd-replace-brick.c glusterd-log-ops.c glusterd-tier.c \
+ glusterd-replace-brick.c glusterd-log-ops.c \
glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \
glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \
glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \
glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \
glusterd-snapshot-utils.c glusterd-conn-mgmt.c \
- glusterd-proc-mgmt.c glusterd-svc-mgmt.c glusterd-shd-svc.c \
+ glusterd-proc-mgmt.c glusterd-svc-mgmt.c \
glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \
glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \
glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \
- glusterd-reset-brick.c glusterd-tierd-svc.c glusterd-tierd-svc-helper.c \
- glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c
-
+ glusterd-reset-brick.c glusterd-shd-svc.c glusterd-shd-svc-helper.c \
+ glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c \
+ $(CONTRIBDIR)/mount/mntent.c
glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/libglusterd/src/libglusterd.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS)
+ $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) $(GF_XLATOR_MGNT_LIBADD)
noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
@@ -38,13 +41,14 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \
glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \
glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \
- glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \
+ glusterd-svc-mgmt.h glusterd-nfs-svc.h \
glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \
glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \
glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \
- glusterd-tierd-svc.h glusterd-tierd-svc-helper.h \
+ glusterd-shd-svc.h glusterd-shd-svc-helper.h \
glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \
- $(CONTRIBDIR)/userspace-rcu/rculist-extra.h
+ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h \
+ $(CONTRIBDIR)/mount/mntent_compat.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
@@ -52,7 +56,10 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(CONTRIBDIR)/mount -I$(CONTRIBDIR)/userspace-rcu \
-DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \
-DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" \
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE)
+ -DCONFDIR=\"$(localstatedir)/run/gluster/shared_storage/nfs-ganesha\" \
+ -DGANESHA_PREFIX=\"$(libexecdir)/ganesha\" \
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) \
+ -I$(top_srcdir)/libglusterd/src/
AM_CFLAGS = -Wall $(GF_CFLAGS) $(URCU_CFLAGS) $(URCU_CDS_CFLAGS) $(XML_CFLAGS)
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
index c8e30a5682f..6adb799b18f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
@@ -201,6 +201,6 @@ manager:
ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL, PROC_START_NO_WAIT);
out:
- gf_msg_debug(this->name, 0, "Returning %d", ret);
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
index 0608badb91d..37429fe9214 100644
--- a/xlators/mgmt/glusterd/src/glusterd-bitrot.c
+++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
@@ -8,7 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -16,10 +16,10 @@
#include "glusterd-store.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
-#include "run.h"
-#include "syscall.h"
-#include "byte-order.h"
-#include "compat-errno.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
#include "glusterd-scrub-svc.h"
#include "glusterd-messages.h"
@@ -34,6 +34,7 @@ const char *gd_bitrot_op_list[GF_BITROT_OPTION_TYPE_MAX] = {
[GF_BITROT_OPTION_TYPE_SCRUB_FREQ] = "scrub-frequency",
[GF_BITROT_OPTION_TYPE_SCRUB] = "scrub",
[GF_BITROT_OPTION_TYPE_EXPIRY_TIME] = "expiry-time",
+ [GF_BITROT_OPTION_TYPE_SIGNER_THREADS] = "signer-threads",
};
int
@@ -319,7 +320,7 @@ glusterd_bitrot_expiry_time(glusterd_volinfo_t *volinfo, dict_t *dict,
int32_t ret = -1;
uint32_t expiry_time = 0;
xlator_t *this = NULL;
- char dkey[1024] = {
+ char dkey[32] = {
0,
};
@@ -354,6 +355,81 @@ out:
return ret;
}
+static gf_boolean_t
+is_bitd_configure_noop(xlator_t *this, glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t noop = _gf_true;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ if (!glusterd_is_bitrot_enabled(volinfo))
+ goto out;
+ else if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ goto out;
+ else {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
+ continue;
+ noop = _gf_false;
+ return noop;
+ }
+ }
+out:
+ return noop;
+}
+
+static int
+glusterd_bitrot_signer_threads(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ uint32_t signer_th_count = 0;
+ uint32_t existing_th_count = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char dkey[32] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ ret = dict_get_uint32(dict, "signer-threads", &signer_th_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bitrot signer thread count.");
+ goto out;
+ }
+
+ ret = dict_get_uint32(volinfo->dict, key, &existing_th_count);
+ if (ret == 0 && signer_th_count == existing_th_count) {
+ goto out;
+ }
+
+ snprintf(dkey, sizeof(dkey), "%d", signer_th_count);
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set option %s", key);
+ goto out;
+ }
+
+ if (!is_bitd_configure_noop(this, volinfo)) {
+ ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL,
+ PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL,
+ "Failed to reconfigure bitrot services");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
static int
glusterd_bitrot_enable(glusterd_volinfo_t *volinfo, char **op_errstr)
{
@@ -594,6 +670,15 @@ glusterd_op_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
volinfo, dict, "features.expiry-time", op_errstr);
if (ret)
goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SIGNER_THREADS:
+ ret = glusterd_bitrot_signer_threads(
+ volinfo, dict, "features.signer-threads", op_errstr);
+ if (ret)
+ goto out;
+ break;
+
case GF_BITROT_CMD_SCRUB_STATUS:
case GF_BITROT_CMD_SCRUB_ONDEMAND:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index f64237c4e18..e56cd0e6c74 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -20,42 +20,12 @@
#include "glusterd-svc-helper.h"
#include "glusterd-messages.h"
#include "glusterd-server-quorum.h"
-#include "run.h"
-#include "glusterd-volgen.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include <sys/signal.h>
/* misc */
-gf_boolean_t
-glusterd_is_tiering_supported(char *op_errstr)
-{
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- gf_boolean_t supported = _gf_false;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
-
- conf = this->private;
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- if (conf->op_version < GD_OP_VERSION_3_7_0)
- goto out;
-
- supported = _gf_true;
-
-out:
- if (!supported && op_errstr != NULL && conf)
- sprintf(op_errstr,
- "Tier operation failed. The cluster is "
- "operating at version %d. Tiering"
- " is unavailable in this version.",
- conf->op_version);
-
- return supported;
-}
-
/* In this function, we decide, based on the 'count' of the brick,
where to add it in the current volume. 'count' tells us already
how many of the given bricks are added. other argument are self-
@@ -212,20 +182,19 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
{
int ret = -1;
int replica_nodes = 0;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
switch (volinfo->type) {
- case GF_CLUSTER_TYPE_TIER:
- ret = 1;
- goto out;
-
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_DISPERSE:
snprintf(err_str, err_len,
"replica count (%d) option given for non replicate "
"volume %s",
replica_count, volinfo->volname);
- gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_REPLICA, "%s",
- err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ARGUMENT,
+ err_str, NULL);
goto out;
case GF_CLUSTER_TYPE_REPLICATE:
@@ -236,8 +205,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"than volume %s's replica count (%d)",
replica_count, volinfo->volname,
volinfo->replica_count);
- gf_msg(THIS->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
- "%s", err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
if (replica_count == volinfo->replica_count) {
@@ -251,8 +220,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"(or %dxN)",
brick_count, volinfo->dist_leaf_count,
volinfo->dist_leaf_count);
- gf_msg(THIS->name, GF_LOG_WARNING, EINVAL,
- GD_MSG_INVALID_ENTRY, "%s", err_str);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
ret = 1;
@@ -267,6 +236,8 @@ gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
"need %d(xN) bricks for reducing replica "
"count of the volume from %d to %d",
replica_nodes, volinfo->replica_count, replica_count);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
goto out;
}
break;
@@ -316,6 +287,7 @@ __glusterd_handle_add_brick(rpcsvc_request_t *req)
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -351,10 +323,13 @@ __glusterd_handle_add_brick(rpcsvc_request_t *req)
goto out;
}
- if (!(ret = glusterd_check_volume_exists(volname))) {
- ret = -1;
- snprintf(err_str, sizeof(err_str), "Volume %s does not exist", volname);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volinfo "
+ "for volume name %s",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s",
err_str);
goto out;
}
@@ -396,57 +371,8 @@ __glusterd_handle_add_brick(rpcsvc_request_t *req)
goto out;
}
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volinfo "
- "for volume name %s",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s",
- err_str);
- goto out;
- }
-
total_bricks = volinfo->brick_count + brick_count;
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- snprintf(err_str, sizeof(err_str), "Volume %s is already a tier.",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ALREADY_TIER, "%s",
- err_str);
- ret = -1;
- goto out;
- }
-
- if (glusterd_is_tiering_supported(err_str) == _gf_false) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
- "Tiering not supported at this version");
- ret = -1;
- goto out;
- }
-
- ret = dict_get_int32n(dict, "hot-type", SLEN("hot-type"), &type);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "failed to get type from dictionary");
- goto out;
- }
-
- goto brick_val;
- }
-
- ret = glusterd_disallow_op_for_tier(volinfo, GD_OP_ADD_BRICK, -1);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Add-brick operation is "
- "not supported on a tiered volume %s",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_UNSUPPORTED, "%s",
- err_str);
- goto out;
- }
-
if (!stripe_count && !replica_count) {
if (volinfo->type == GF_CLUSTER_TYPE_NONE)
goto brick_val;
@@ -589,6 +515,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
int i = 0;
int ret = 0;
int count = volinfo->replica_count - replica_count;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
if (replica_count && subvols) {
for (i = 0; i < volinfo->subvol_count; i++) {
@@ -598,6 +526,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
"Remove exactly %d"
" brick(s) from each subvolume.",
count);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL);
break;
}
}
@@ -611,6 +541,8 @@ subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
ret = -1;
snprintf(err_str, err_len, "Bricks not from same subvol for %s",
vol_type);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL);
break;
}
} while (++i < volinfo->subvol_count);
@@ -624,43 +556,6 @@ subvol_matcher_destroy(int *subvols)
GF_FREE(subvols);
}
-int
-glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo)
-{
- char key[64] = "";
- char value[2048] = ""; /* hostname + path */
- int brick_num = 0;
- int hot_brick_num = 0;
- glusterd_brickinfo_t *brickinfo;
- int ret = 0;
- int32_t len = 0;
-
- /* cold tier bricks at tail of list so use reverse iteration */
- cds_list_for_each_entry_reverse(brickinfo, &volinfo->bricks, brick_list)
- {
- brick_num++;
- if (brick_num > volinfo->tier_info.cold_brick_count) {
- hot_brick_num++;
- sprintf(key, "brick%d", hot_brick_num);
- len = snprintf(value, sizeof(value), "%s:%s", brickinfo->hostname,
- brickinfo->path);
- if ((len < 0) || (len >= sizeof(value))) {
- return -1;
- }
-
- ret = dict_set_str(dict, key, strdup(value));
- if (ret)
- break;
- }
- }
-
- ret = dict_set_int32n(dict, "count", SLEN("count"), hot_brick_num);
- if (ret)
- return -1;
-
- return hot_brick_num;
-}
-
static int
glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
int32_t count, int32_t replica_count,
@@ -672,6 +567,9 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *last = NULL;
char *arbiter_array = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
if (volinfo->type != GF_CLUSTER_TYPE_REPLICATE)
goto out;
@@ -690,6 +588,8 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
"Remove arbiter "
"brick(s) only when converting from "
"arbiter to replica 2 subvolume.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL);
ret = -1;
goto out;
}
@@ -713,7 +613,9 @@ glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
snprintf(err_str, err_len,
"Removed bricks "
"must contain arbiter when converting"
- " to plain distrubute.");
+ " to plain distribute.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL);
ret = -1;
break;
}
@@ -737,6 +639,7 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
char key[64] = "";
int keylen;
int i = 1;
+ glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t **brickinfo_list = NULL;
@@ -755,12 +658,15 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
GF_ASSERT(req);
this = THIS;
GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
// failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
snprintf(err_str, sizeof(err_str), "Received garbage args");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
@@ -813,14 +719,6 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
goto out;
}
- if ((volinfo->type == GF_CLUSTER_TYPE_TIER) &&
- (glusterd_is_tiering_supported(err_str) == _gf_false)) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
- "Tiering not supported at this version");
- ret = -1;
- goto out;
- }
-
ret = dict_get_int32n(dict, "command", SLEN("command"), &cmd);
if (ret) {
snprintf(err_str, sizeof(err_str),
@@ -831,15 +729,6 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
goto out;
}
- ret = glusterd_disallow_op_for_tier(volinfo, GD_OP_REMOVE_BRICK, cmd);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Removing brick from a Tier volume is not allowed");
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_UNSUPPORTED, "%s",
- err_str);
- goto out;
- }
-
ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
&replica_count);
if (!ret) {
@@ -891,8 +780,7 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
/* Do not allow remove-brick if the bricks given is less than
the replica count or stripe count */
- if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) &&
- (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
if (volinfo->dist_leaf_count && (count % volinfo->dist_leaf_count)) {
snprintf(err_str, sizeof(err_str),
"Remove brick "
@@ -905,18 +793,13 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
}
}
- /* subvol match is not required for tiered volume*/
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
- (volinfo->type != GF_CLUSTER_TYPE_TIER) &&
(volinfo->subvol_count > 1)) {
ret = subvol_matcher_init(&subvols, volinfo->subvol_count);
if (ret)
goto out;
}
- if (volinfo->type == GF_CLUSTER_TYPE_TIER)
- count = glusterd_set_detach_bricks(dict, volinfo);
-
brickinfo_list = GF_CALLOC(count, sizeof(*brickinfo_list),
gf_common_mt_pointer);
if (!brickinfo_list) {
@@ -957,18 +840,10 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
(volinfo->brick_count <= volinfo->dist_leaf_count))
continue;
- /* Find which subvolume the brick belongs to.
- * subvol match is not required for tiered volume
- *
- */
- if (volinfo->type != GF_CLUSTER_TYPE_TIER)
- subvol_matcher_update(subvols, volinfo, brickinfo);
+ subvol_matcher_update(subvols, volinfo, brickinfo);
}
- /* Check if the bricks belong to the same subvolumes.*/
- /* subvol match is not required for tiered volume*/
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
- (volinfo->type != GF_CLUSTER_TYPE_TIER) &&
(volinfo->subvol_count > 1)) {
ret = subvol_matcher_verify(subvols, volinfo, err_str, sizeof(err_str),
vol_type, replica_count);
@@ -982,7 +857,17 @@ __glusterd_handle_remove_brick(rpcsvc_request_t *req)
if (ret)
goto out;
- ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict);
+ if (conf->op_version < GD_OP_VERSION_8_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. remove-brick operation"
+ "falling back to syncop framework.",
+ GD_OP_VERSION_8_0);
+ ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict);
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_REMOVE_BRICK,
+ dict);
+ }
out:
if (ret) {
@@ -1132,13 +1017,13 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
0,
};
gf_boolean_t restart_needed = 0;
- int caps = 0;
int brickid = 0;
char key[64] = "";
char *brick_mount_dir = NULL;
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
gf_boolean_t is_valid_add_brick = _gf_false;
+ gf_boolean_t restart_shd = _gf_false;
struct statvfs brickstat = {
0,
};
@@ -1226,10 +1111,7 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
}
brickinfo->statfs_fsid = brickstat.f_fsid;
}
- /* hot tier bricks are added to head of brick list */
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- cds_list_add(&brickinfo->brick_list, &volinfo->bricks);
- } else if (stripe_count || replica_count) {
+ if (stripe_count || replica_count) {
add_brick_at_right_order(brickinfo, volinfo, (i - 1), stripe_count,
replica_count);
} else {
@@ -1294,22 +1176,19 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
if (count)
brick = strtok_r(brick_list + 1, " \n", &saveptr);
-#ifdef HAVE_BD_XLATOR
- if (brickinfo->vg[0])
- caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
-#endif
- /* This check needs to be added to distinguish between
- * attach-tier commands and add-brick commands.
- * When a tier is attached, adding is done via add-brick
- * and setting of pending xattrs shouldn't be done for
- * attach-tiers as they are virtually new volumes.
- */
if (glusterd_is_volume_replicate(volinfo)) {
- if (replica_count &&
- !dict_getn(dict, "attach-tier", SLEN("attach-tier")) &&
- conf->op_version >= GD_OP_VERSION_3_7_10) {
+ if (replica_count && conf->op_version >= GD_OP_VERSION_3_7_10) {
is_valid_add_brick = _gf_true;
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
+ "Failed to stop shd for %s.", volinfo->volname);
+ }
+ restart_shd = _gf_true;
+ }
ret = generate_dummy_client_volfiles(volinfo);
if (ret) {
gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
@@ -1324,22 +1203,6 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
_gf_true);
if (ret)
goto out;
-#ifdef HAVE_BD_XLATOR
- char msg[1024] = "";
- /* Check for VG/thin pool if its BD volume */
- if (brickinfo->vg[0]) {
- ret = glusterd_is_valid_vg(brickinfo, 0, msg);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_CRITICAL, 0, GD_MSG_INVALID_VG, "%s",
- msg);
- goto out;
- }
- /* if anyone of the brick does not have thin support,
- disable it for entire volume */
- caps &= brickinfo->caps;
- } else
- caps = 0;
-#endif
if (gf_uuid_is_null(brickinfo->uuid)) {
ret = glusterd_resolve_brick(brickinfo);
@@ -1384,7 +1247,6 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
dict_foreach(volinfo->gsync_slaves, _glusterd_restart_gsync_session,
&param);
}
- volinfo->caps = caps;
generate_volfiles:
if (conf->op_version <= GD_OP_VERSION_3_7_5) {
@@ -1401,6 +1263,14 @@ generate_volfiles:
out:
GF_FREE(free_ptr1);
GF_FREE(free_ptr2);
+ if (restart_shd) {
+ if (volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
+ PROC_START_NO_WAIT)) {
+ gf_msg("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL,
+ "Failed to start shd for %s.", volinfo->volname);
+ }
+ }
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
@@ -1489,14 +1359,14 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
"Unable to get volume name");
goto out;
}
ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
"Unable to find volume: %s", volname);
goto out;
}
@@ -1508,13 +1378,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
&replica_count);
if (ret) {
- gf_msg_debug(THIS->name, 0, "Unable to get replica count");
- }
-
- ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
- &arbiter_count);
- if (ret) {
- gf_msg_debug(THIS->name, 0, "No arbiter count present in the dict");
+ gf_msg_debug(this->name, 0, "Unable to get replica count");
}
if (replica_count > 0) {
@@ -1528,19 +1392,20 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
}
- if (glusterd_is_volume_replicate(volinfo)) {
+ glusterd_add_peers_to_auth_list(volname);
+
+ if (replica_count && glusterd_is_volume_replicate(volinfo)) {
/* Do not allow add-brick for stopped volumes when replica-count
* is being increased.
*/
- if (conf->op_version >= GD_OP_VERSION_3_7_10 &&
- !dict_getn(dict, "attach-tier", SLEN("attach-tier")) &&
- replica_count && GLUSTERD_STATUS_STOPPED == volinfo->status) {
+ if (GLUSTERD_STATUS_STOPPED == volinfo->status &&
+ conf->op_version >= GD_OP_VERSION_3_7_10) {
ret = -1;
snprintf(msg, sizeof(msg),
" Volume must not be in"
" stopped state when replica-count needs to "
" be increased.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1548,25 +1413,31 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
/* op-version check for replica 2 to arbiter conversion. If we
* don't have this check, an older peer added as arbiter brick
* will not have the arbiter xlator in its volfile. */
- if ((conf->op_version < GD_OP_VERSION_3_8_0) && (arbiter_count == 1) &&
- (replica_count == 3)) {
- ret = -1;
- snprintf(msg, sizeof(msg),
- "Cluster op-version must "
- "be >= 30800 to add arbiter brick to a "
- "replica 2 volume.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
- msg);
- *op_errstr = gf_strdup(msg);
- goto out;
+ if ((replica_count == 3) && (conf->op_version < GD_OP_VERSION_3_8_0)) {
+ ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
+ &arbiter_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "No arbiter count present in the dict");
+ } else if (arbiter_count == 1) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Cluster op-version must "
+ "be >= 30800 to add arbiter brick to a "
+ "replica 2 volume.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
}
/* Do not allow increasing replica count for arbiter volumes. */
- if (replica_count && volinfo->arbiter_count) {
+ if (volinfo->arbiter_count) {
ret = -1;
snprintf(msg, sizeof(msg),
"Increasing replica count "
"for arbiter volumes is not supported.");
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1575,6 +1446,43 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
is_force = dict_get_str_boolean(dict, "force", _gf_false);
+ /* Check brick order if the volume type is replicate or disperse. If
+ * force at the end of command not given then check brick order.
+ * doing this check at the originator node is sufficient.
+ */
+
+ if (!is_force && is_origin_glusterd(dict)) {
+ ret = 0;
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ gf_msg_debug(this->name, 0,
+ "Replicate cluster type "
+ "found. Checking brick order.");
+ if (replica_count)
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
+ replica_count);
+ else
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
+ volinfo->replica_count);
+ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ gf_msg_debug(this->name, 0,
+ "Disperse cluster type"
+ " found. Checking brick order.");
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type, &volname,
+ &bricks, &count,
+ volinfo->disperse_count);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
+ "Not adding brick because of "
+ "bad brick order. %s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ }
+
if (volinfo->replica_count < replica_count && !is_force) {
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
@@ -1591,7 +1499,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
if (len < 0) {
strcpy(msg, "<error>");
}
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
msg);
*op_errstr = gf_strdup(msg);
goto out;
@@ -1623,46 +1531,40 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"Volume name %s rebalance is in "
"progress. Please retry after completion",
volname);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
*op_errstr = gf_strdup(msg);
ret = -1;
goto out;
}
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- /*
- * This check is needed because of add/remove brick
- * is not supported on a tiered volume. So once a tier
- * is attached we cannot commit or stop the remove-brick
- * task. Please change this comment once we start supporting
- * add/remove brick on a tiered volume.
- */
- if (!gd_is_remove_brick_committed(volinfo)) {
- snprintf(msg, sizeof(msg),
- "An earlier remove-brick "
- "task exists for volume %s. Either commit it"
- " or stop it before attaching a tier.",
- volinfo->volname);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_OLD_REMOVE_BRICK_EXISTS,
- "%s", msg);
- *op_errstr = gf_strdup(msg);
- ret = -1;
+ if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s has %" PRIu64
+ " snapshots. "
+ "Changing the volume configuration will not effect snapshots."
+ "But the snapshot brick mount should be intact to "
+ "make them function.",
+ volname, volinfo->snap_count);
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", msg);
+ msg[0] = '\0';
+ }
+
+ if (!count) {
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get count");
goto out;
}
}
- ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get count");
- goto out;
- }
-
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get bricks");
- goto out;
+ if (!bricks) {
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bricks");
+ goto out;
+ }
}
if (bricks) {
@@ -1681,7 +1583,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
"brick path %s is "
"too long",
brick);
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
msg);
*op_errstr = gf_strdup(msg);
@@ -1692,7 +1594,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true,
NULL);
if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
"Add-brick: Unable"
" to get brickinfo");
goto out;
@@ -1708,18 +1610,6 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
}
if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
-#ifdef HAVE_BD_XLATOR
- if (brickinfo->vg[0]) {
- ret = glusterd_is_valid_vg(brickinfo, 1, msg);
- if (ret) {
- gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_VG,
- "%s", msg);
- *op_errstr = gf_strdup(msg);
- goto out;
- }
- }
-#endif
-
ret = glusterd_validate_and_create_brickpath(
brickinfo, volinfo->volume_id, volinfo->volname, op_errstr,
is_force, _gf_false);
@@ -1774,7 +1664,7 @@ out:
GF_FREE(str_ret);
GF_FREE(all_bricks);
- gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -1798,6 +1688,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
};
glusterd_conf_t *priv = THIS->private;
int pid = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
/* Check whether all the nodes of the bricks to be removed are
* up, if not fail the operation */
@@ -1806,6 +1698,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
ret = dict_get_strn(dict, key, keylen, &brick);
if (ret) {
snprintf(msg, sizeof(msg), "Unable to get %s", key);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "key=%s", key, NULL);
*errstr = gf_strdup(msg);
goto out;
}
@@ -1817,54 +1711,30 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"Incorrect brick "
"%s for volume %s",
brick, volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INCORRECT_BRICK,
+ "Brick=%s, Volume=%s", brick, volinfo->volname, NULL);
*errstr = gf_strdup(msg);
goto out;
}
/* Do not allow commit if the bricks are not decommissioned
- * if its a remove brick commit or detach-tier commit
+ * if its a remove brick commit
*/
- if (!brickinfo->decommissioned) {
- if (cmd == GF_OP_CMD_COMMIT) {
- snprintf(msg, sizeof(msg),
- "Brick %s "
- "is not decommissioned. "
- "Use start or force option",
- brick);
- *errstr = gf_strdup(msg);
- ret = -1;
- goto out;
- }
-
- if (cmd == GF_OP_CMD_DETACH_COMMIT ||
- cmd_defrag == GF_DEFRAG_CMD_DETACH_COMMIT) {
- snprintf(msg, sizeof(msg),
- "Bricks in Hot "
- "tier are not decommissioned yet. Use "
- "gluster volume tier <VOLNAME> "
- "detach start to start the decommission process");
- *errstr = gf_strdup(msg);
- ret = -1;
- goto out;
- }
- } else {
- if ((cmd == GF_OP_CMD_DETACH_COMMIT ||
- (cmd_defrag == GF_DEFRAG_CMD_DETACH_COMMIT)) &&
- (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED)) {
- snprintf(msg, sizeof(msg),
- "Bricks in Hot "
- "tier are not decommissioned yet. Wait for "
- "the detach to complete using gluster volume "
- "tier <VOLNAME> status.");
- *errstr = gf_strdup(msg);
- ret = -1;
- goto out;
- }
+ if (!brickinfo->decommissioned && cmd == GF_OP_CMD_COMMIT) {
+ snprintf(msg, sizeof(msg),
+ "Brick %s "
+ "is not decommissioned. "
+ "Use start or force option",
+ brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_DECOM,
+ "Use 'start' or 'force' option, Brick=%s", brick, NULL);
+ *errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
}
if (glusterd_is_local_brick(THIS, volinfo, brickinfo)) {
switch (cmd) {
case GF_OP_CMD_START:
- case GF_OP_CMD_DETACH_START:
goto check;
case GF_OP_CMD_NONE:
default:
@@ -1872,8 +1742,6 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
}
switch (cmd_defrag) {
- case GF_DEFRAG_CMD_DETACH_START:
- break;
case GF_DEFRAG_CMD_NONE:
default:
continue;
@@ -1885,6 +1753,10 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"brick %s. Use force option to "
"remove the offline brick",
brick);
+ gf_smsg(
+ this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_STOPPED,
+ "Use 'force' option to remove the offline brick, Brick=%s",
+ brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1895,6 +1767,8 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
"Found dead "
"brick %s",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_DEAD,
+ "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
goto out;
@@ -1904,29 +1778,33 @@ glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
continue;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_uuid(brickinfo->uuid);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
snprintf(msg, sizeof(msg),
"Host node of the "
"brick %s is not in cluster",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_HOST_NOT_FOUND, "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
- rcu_read_unlock();
goto out;
}
if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
snprintf(msg, sizeof(msg),
"Host node of the "
"brick %s is down",
brick);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_HOST_DOWN,
+ "Brick=%s", brick, NULL);
*errstr = gf_strdup(msg);
ret = -1;
- rcu_read_unlock();
goto out;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
}
out:
@@ -2001,6 +1879,7 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
errstr = gf_strdup(
"Deleting all the bricks of the "
"volume is not allowed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DELETE, NULL);
ret = -1;
goto out;
}
@@ -2009,24 +1888,13 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
switch (cmd) {
case GF_OP_CMD_NONE:
errstr = gf_strdup("no remove-brick command issued");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NO_REMOVE_CMD,
+ NULL);
goto out;
case GF_OP_CMD_STATUS:
ret = 0;
goto out;
-
- case GF_OP_CMD_DETACH_START:
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not a tier "
- "volume",
- volinfo->volname);
- errstr = gf_strdup(msg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_TIER, "%s",
- errstr);
- goto out;
- }
-
case GF_OP_CMD_START: {
if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
dict_getn(dict, "replica-count", SLEN("replica-count"))) {
@@ -2041,21 +1909,12 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
}
if (GLUSTERD_STATUS_STARTED != volinfo->status) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "Volume %s needs "
- "to be started before detach-tier "
- "(you can use 'force' or 'commit' "
- "to override this behavior)",
- volinfo->volname);
- } else {
- snprintf(msg, sizeof(msg),
- "Volume %s needs "
- "to be started before remove-brick "
- "(you can use 'force' or 'commit' "
- "to override this behavior)",
- volinfo->volname);
- }
+ snprintf(msg, sizeof(msg),
+ "Volume %s needs "
+ "to be started before remove-brick "
+ "(you can use 'force' or 'commit' "
+ "to override this behavior)",
+ volinfo->volname);
errstr = gf_strdup(msg);
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED,
"%s", errstr);
@@ -2104,6 +1963,21 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
goto out;
}
+ if (volinfo->snap_count > 0 ||
+ !cds_list_empty(&volinfo->snap_volumes)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s has %" PRIu64
+ " snapshots. "
+ "Changing the volume configuration will not effect "
+ "snapshots."
+ "But the snapshot brick mount should be intact to "
+ "make them function.",
+ volname, volinfo->snap_count);
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s",
+ msg);
+ msg[0] = '\0';
+ }
+
ret = glusterd_remove_brick_validate_bricks(
cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
if (ret)
@@ -2132,55 +2006,16 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
}
case GF_OP_CMD_STOP:
- case GF_OP_CMD_STOP_DETACH_TIER:
ret = 0;
break;
- case GF_OP_CMD_DETACH_COMMIT:
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not a tier "
- "volume",
- volinfo->volname);
- errstr = gf_strdup(msg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_TIER, "%s",
- errstr);
- goto out;
- }
- if (volinfo->decommission_in_progress) {
- errstr = gf_strdup(
- "use 'force' option as migration "
- "is in progress");
- goto out;
- }
- if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_FAILED) {
- errstr = gf_strdup(
- "use 'force' option as migration "
- "has failed");
- goto out;
- }
-
- ret = glusterd_remove_brick_validate_bricks(
- cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
- if (ret)
- goto out;
-
- /* If geo-rep is configured, for this volume, it should be
- * stopped.
- */
- param.volinfo = volinfo;
- ret = glusterd_check_geo_rep_running(&param, op_errstr);
- if (ret || param.is_active) {
- ret = -1;
- goto out;
- }
- break;
-
case GF_OP_CMD_COMMIT:
if (volinfo->decommission_in_progress) {
errstr = gf_strdup(
"use 'force' option as migration "
"is in progress");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_PROG,
+ "Use 'force' option", NULL);
goto out;
}
@@ -2188,9 +2023,27 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
errstr = gf_strdup(
"use 'force' option as migration "
"has failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_FAIL,
+ "Use 'force' option", NULL);
goto out;
}
+ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) {
+ if (volinfo->rebal.rebalance_failures > 0 ||
+ volinfo->rebal.skipped_files > 0) {
+ errstr = gf_strdup(
+ "use 'force' option as migration "
+ "of some files might have been skipped or "
+ "has failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MIGRATION_FAIL,
+ "Use 'force' option, some files might have been "
+ "skipped",
+ NULL);
+ goto out;
+ }
+ }
+
ret = glusterd_remove_brick_validate_bricks(
cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
if (ret)
@@ -2208,18 +2061,11 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
break;
- case GF_OP_CMD_DETACH_COMMIT_FORCE:
- if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
- snprintf(msg, sizeof(msg),
- "volume %s is not a tier "
- "volume",
- volinfo->volname);
- errstr = gf_strdup(msg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_TIER, "%s",
- errstr);
- goto out;
- }
case GF_OP_CMD_COMMIT_FORCE:
+ case GF_OP_CMD_DETACH_START:
+ case GF_OP_CMD_DETACH_COMMIT:
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ case GF_OP_CMD_STOP_DETACH_TIER:
break;
}
ret = 0;
@@ -2230,7 +2076,8 @@ out:
if (op_errstr)
*op_errstr = errstr;
}
-
+ if (!op_errstr && errstr)
+ GF_FREE(errstr);
return ret;
}
@@ -2312,48 +2159,6 @@ glusterd_remove_brick_migrate_cbk(glusterd_volinfo_t *volinfo,
return ret;
}
-static int
-glusterd_op_perform_attach_tier(dict_t *dict, glusterd_volinfo_t *volinfo,
- int count, char *bricks)
-{
- int ret = 0;
- int replica_count = 0;
- int type = 0;
-
- /*
- * Store the new (cold) tier's structure until the graph is generated.
- * If there is a failure before the graph is generated the
- * structure will revert to its original state.
- */
- volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count;
- volinfo->tier_info.cold_type = volinfo->type;
- volinfo->tier_info.cold_brick_count = volinfo->brick_count;
- volinfo->tier_info.cold_replica_count = volinfo->replica_count;
- volinfo->tier_info.cold_disperse_count = volinfo->disperse_count;
- volinfo->tier_info.cold_redundancy_count = volinfo->redundancy_count;
-
- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
- &replica_count);
- if (!ret)
- volinfo->tier_info.hot_replica_count = replica_count;
- else
- volinfo->tier_info.hot_replica_count = 1;
- volinfo->tier_info.hot_brick_count = count;
- ret = dict_get_int32n(dict, "hot-type", SLEN("hot-type"), &type);
- volinfo->tier_info.hot_type = type;
- ret = dict_set_int32n(dict, "type", SLEN("type"), GF_CLUSTER_TYPE_TIER);
-
- if (!ret)
- ret = dict_set_nstrn(volinfo->dict, "features.ctr-enabled",
- SLEN("features.ctr-enabled"), "on", SLEN("on"));
-
- if (!ret)
- ret = dict_set_nstrn(volinfo->dict, "cluster.tier-mode",
- SLEN("cluster.tier-mode"), "cache", SLEN("cache"));
-
- return ret;
-}
-
int
glusterd_op_add_brick(dict_t *dict, char **op_errstr)
{
@@ -2401,11 +2206,6 @@ glusterd_op_add_brick(dict_t *dict, char **op_errstr)
goto out;
}
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- gf_msg_debug(THIS->name, 0, "Adding tier");
- glusterd_op_perform_attach_tier(dict, volinfo, count, bricks);
- }
-
ret = glusterd_op_perform_add_bricks(volinfo, count, bricks, dict);
if (ret) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL,
@@ -2434,94 +2234,118 @@ out:
}
int
-glusterd_op_add_tier_brick(dict_t *dict, char **op_errstr)
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr)
{
int ret = 0;
char *volname = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- char *bricks = NULL;
- int32_t count = 0;
- this = THIS;
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- priv = this->private;
- GF_VALIDATE_OR_GOTO(this->name, priv, out);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
+
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
"Unable to get volume name");
goto out;
}
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
- ret = glusterd_volinfo_find(volname, &volinfo);
+int
+glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(req_dict);
+ ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
- "Volume not found");
+ gf_msg_debug(this->name, 0, "volname not found");
goto out;
}
- ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get count");
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to allocate memory");
goto out;
}
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
+ ret = dict_get_int32n(rsp_dict, "command", SLEN("command"), &cmd);
if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Unable to get bricks");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command");
goto out;
}
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- gf_msg_debug(THIS->name, 0, "Adding tier");
- glusterd_op_perform_attach_tier(dict, volinfo, count, bricks);
- }
+ /* remove brick task id is generted in glusterd_op_stage_remove_brick(),
+ * but rsp_dict is unavailable there. So copying it to rsp_dict from
+ * req_dict here. */
- ret = glusterd_op_perform_add_bricks(volinfo, count, bricks, dict);
- if (ret) {
- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL,
- "Unable to add bricks");
- goto out;
+ if (is_origin_glusterd(rsp_dict)) {
+ ret = dict_get_strn(req_dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Missing rebalance id for remove-brick");
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_REBALANCE_ID_MISSING,
+ "%s", msg);
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ rsp_dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
+ "Failed to set remove-brick-id");
+ goto out;
+ }
+ }
}
- if (priv->op_version <= GD_OP_VERSION_3_10_0) {
- ret = glusterd_store_volinfo(volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id) &&
+ GD_OP_REMOVE_BRICK == volinfo->rebal.op) {
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, rsp_dict,
+ GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set task-id for volume %s", volname);
goto out;
- } else {
- /*
- * The cluster is operating at version greater than
- * gluster-3.10.0. So no need to store volfiles
- * in commit phase, the same will be done
- * in post validate phase with v3 framework.
- */
+ }
}
-
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_svcs_manager(volinfo);
-
out:
return ret;
}
-
-void
-glusterd_op_perform_detach_tier(glusterd_volinfo_t *volinfo)
-{
- volinfo->type = volinfo->tier_info.cold_type;
- volinfo->replica_count = volinfo->tier_info.cold_replica_count;
- volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
- volinfo->redundancy_count = volinfo->tier_info.cold_redundancy_count;
- volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count;
-}
-
int
glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
{
@@ -2538,8 +2362,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
int force = 0;
gf1_op_commands cmd = 0;
int32_t replica_count = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_brickinfo_t *tmp = NULL;
char *task_id_str = NULL;
xlator_t *this = NULL;
dict_t *bricks_dict = NULL;
@@ -2547,11 +2369,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
int start_remove = 0;
uint32_t commit_hash = 0;
int defrag_cmd = 0;
- int detach_commit = 0;
- void *tier_info = NULL;
- char *cold_shd_key = NULL;
- char *hot_shd_key = NULL;
- int delete_key = 1;
glusterd_conf_t *conf = NULL;
this = THIS;
@@ -2582,7 +2399,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
}
cmd = flag;
- if ((GF_OP_CMD_START == cmd) || (GF_OP_CMD_DETACH_START == cmd))
+ if (GF_OP_CMD_START == cmd)
start_remove = 1;
/* Set task-id, if available, in ctx dict for operations other than
@@ -2622,35 +2439,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
goto out;
case GF_OP_CMD_STOP:
- case GF_OP_CMD_STOP_DETACH_TIER: {
- /* Fall back to the old volume file */
- cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks,
- brick_list)
- {
- if (!brickinfo->decommissioned)
- continue;
- brickinfo->decommissioned = 0;
- }
- ret = glusterd_create_volfiles_and_notify_services(volinfo);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0,
- GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
- goto out;
- }
-
- ret = glusterd_store_volinfo(volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
- "failed to store volinfo");
- goto out;
- }
-
- ret = 0;
- goto out;
- }
-
- case GF_OP_CMD_DETACH_START:
case GF_OP_CMD_START:
/* Reset defrag status to 'NOT STARTED' whenever a
* remove-brick/rebalance command is issued to remove
@@ -2658,6 +2446,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
* Update defrag_cmd as well or it will only be done
* for nodes on which the brick to be removed exists.
*/
+ /* coverity[MIXED_ENUMS] */
volinfo->rebal.defrag_cmd = cmd;
volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY,
@@ -2676,43 +2465,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
force = 1;
break;
- case GF_OP_CMD_DETACH_COMMIT:
- case GF_OP_CMD_DETACH_COMMIT_FORCE:
- glusterd_op_perform_detach_tier(volinfo);
- detach_commit = 1;
-
- /* Disabling ctr when detaching a tier, since
- * currently tier is the only consumer of ctr.
- * Revisit this code when this constraint no
- * longer exist.
- */
- dict_deln(volinfo->dict, "features.ctr-enabled",
- SLEN("features.ctr-enabled"));
- dict_deln(volinfo->dict, "cluster.tier-mode",
- SLEN("cluster.tier-mode"));
-
- hot_shd_key = gd_get_shd_key(volinfo->tier_info.hot_type);
- cold_shd_key = gd_get_shd_key(volinfo->tier_info.cold_type);
- if (hot_shd_key) {
- /*
- * Since post detach, shd graph will not contain hot
- * tier. So we need to clear option set for hot tier.
- * For a tiered volume there can be different key
- * for both hot and cold. If hot tier is shd compatible
- * then we need to remove the configured value when
- * detaching a tier, only if the key's are different or
- * cold key is NULL. So we will set delete_key first,
- * and if cold key is not null and they are equal then
- * we will clear the flag. Otherwise we will delete the
- * key.
- */
- if (cold_shd_key)
- delete_key = strcmp(hot_shd_key, cold_shd_key);
- if (delete_key)
- dict_del(volinfo->dict, hot_shd_key);
- }
- /* fall through */
-
case GF_OP_CMD_COMMIT_FORCE:
if (volinfo->decommission_in_progress) {
@@ -2732,6 +2484,11 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
ret = 0;
force = 1;
break;
+ case GF_OP_CMD_DETACH_START:
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ case GF_OP_CMD_DETACH_COMMIT:
+ case GF_OP_CMD_STOP_DETACH_TIER:
+ break;
}
ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
@@ -2740,10 +2497,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
"Unable to get count");
goto out;
}
-
- if (volinfo->type == GF_CLUSTER_TYPE_TIER)
- count = glusterd_set_detach_bricks(dict, volinfo);
-
/* Save the list of bricks for later usage only on starting a
* remove-brick. Right now this is required for displaying the task
* parameters with task status in volume status.
@@ -2796,12 +2549,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
i++;
}
- if (detach_commit) {
- /* Clear related information from volinfo */
- tier_info = ((void *)(&volinfo->tier_info));
- memset(tier_info, 0, sizeof(volinfo->tier_info));
- }
-
if (start_remove)
volinfo->rebal.dict = dict_ref(bricks_dict);
@@ -2824,8 +2571,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
* volumes undergoing a detach operation, they should not
* be modified here.
*/
- if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH_COMMIT) &&
- (cmd != GF_OP_CMD_DETACH_COMMIT_FORCE)) {
+ if (replica_count == 1) {
if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
volinfo->type = GF_CLUSTER_TYPE_NONE;
/* backward compatibility */
@@ -2863,7 +2609,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
}
if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) {
- ret = glusterd_svcs_reconfigure();
+ ret = glusterd_svcs_reconfigure(volinfo);
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL,
"Unable to reconfigure NFS-Server");
@@ -2886,8 +2632,6 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
}
/* perform the rebalance operations */
defrag_cmd = GF_DEFRAG_CMD_START_FORCE;
- if (cmd == GF_OP_CMD_DETACH_START)
- defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
/*
* We need to set this *before* we issue commands to the
* bricks, or else we might end up setting it after the bricks
@@ -2916,7 +2660,7 @@ out:
GF_FREE(brick_tmpstr);
if (bricks_dict)
dict_unref(bricks_dict);
-
+ gf_msg_debug(this->name, 0, "returning %d ", ret);
return ret;
}
@@ -3034,202 +2778,19 @@ out:
}
int
-__glusterd_handle_add_tier_brick(rpcsvc_request_t *req)
-{
- int32_t ret = -1;
- gf_cli_req cli_req = {{
- 0,
- }};
- dict_t *dict = NULL;
- char *bricks = NULL;
- char *volname = NULL;
- int brick_count = 0;
- void *cli_rsp = NULL;
- char err_str[2048] = "";
- gf_cli_rsp rsp = {
- 0,
- };
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- int32_t replica_count = 0;
- int32_t arbiter_count = 0;
- int type = 0;
-
- this = THIS;
- GF_VALIDATE_OR_GOTO("glusterd", this, out);
-
- GF_VALIDATE_OR_GOTO(this->name, req, out);
-
- ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
- if (ret < 0) {
- /*failed to decode msg*/
- req->rpc_err = GARBAGE_ARGS;
- snprintf(err_str, sizeof(err_str), "Garbage args received");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, "%s",
- err_str);
- goto out;
- }
-
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ADD_BRICK_REQ_RECVD,
- "Received add brick req");
-
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new();
-
- ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, errno,
- GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to "
- "unserialize req-buffer to dictionary");
- snprintf(err_str, sizeof(err_str),
- "Unable to decode "
- "the command");
- goto out;
- }
- }
-
- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
-
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volume "
- "name");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
- err_str);
- goto out;
- }
-
- if (!glusterd_check_volume_exists(volname)) {
- snprintf(err_str, sizeof(err_str), "Volume %s does not exist", volname);
- gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
- err_str);
- ret = -1;
- goto out;
- }
-
- ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volume "
- "brick count");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
- err_str);
- goto out;
- }
-
- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
- &replica_count);
- if (!ret) {
- gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
- "replica-count is %d", replica_count);
- }
-
- ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
- &arbiter_count);
- if (!ret) {
- gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
- "arbiter-count is %d", arbiter_count);
- }
-
- if (!dict_getn(dict, "force", SLEN("force"))) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "Failed to get flag");
- ret = -1;
- goto out;
- }
-
- ret = glusterd_volinfo_find(volname, &volinfo);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volinfo "
- "for volume name %s",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s",
- err_str);
- goto out;
- }
-
- if (glusterd_is_tiering_supported(err_str) == _gf_false) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
- "Tiering not supported at this version");
- ret = -1;
- goto out;
- }
-
- if (dict_getn(dict, "attach-tier", SLEN("attach-tier"))) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- snprintf(err_str, sizeof(err_str), "Volume %s is already a tier.",
- volname);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ALREADY_TIER, "%s",
- err_str);
- ret = -1;
- goto out;
- }
-
- ret = dict_get_int32n(dict, "hot-type", SLEN("hot-type"), &type);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
- "failed to get type from dictionary");
- goto out;
- }
- }
-
- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
- if (ret) {
- snprintf(err_str, sizeof(err_str),
- "Unable to get volume "
- "bricks");
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
- err_str);
- goto out;
- }
-
- if (type != volinfo->type) {
- ret = dict_set_int32n(dict, "type", SLEN("type"), type);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
- "failed to set the new type in dict");
- goto out;
- }
- }
-
- ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_ADD_TIER_BRICK, dict);
-
-out:
- if (ret) {
- rsp.op_ret = -1;
- rsp.op_errno = 0;
- if (err_str[0] == '\0')
- snprintf(err_str, sizeof(err_str), "Operation failed");
- rsp.op_errstr = err_str;
- cli_rsp = &rsp;
- glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp,
- dict);
- ret = 0; /*sent error to cli, prevent second reply*/
- }
-
- free(cli_req.dict.dict_val); /*its malloced by xdr*/
-
- return ret;
-}
-
-int
glusterd_handle_add_tier_brick(rpcsvc_request_t *req)
{
- return glusterd_big_locked_handler(req, __glusterd_handle_add_tier_brick);
+ return 0;
}
int
glusterd_handle_attach_tier(rpcsvc_request_t *req)
{
- return glusterd_big_locked_handler(req, __glusterd_handle_add_brick);
+ return 0;
}
int
glusterd_handle_detach_tier(rpcsvc_request_t *req)
{
- return glusterd_big_locked_handler(req, __glusterd_handle_remove_brick);
+ return 0;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
index f84a7617549..5c01f0c70b6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
@@ -8,7 +8,7 @@
cases as published by the Free Software Foundation.
*/
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "rpc-clnt.h"
#include "glusterd.h"
#include "glusterd-conn-mgmt.h"
@@ -26,8 +26,17 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
xlator_t *this = THIS;
glusterd_svc_t *svc = NULL;
- if (!this)
+ if (!this) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED,
+ NULL);
goto out;
+ }
+
+ options = dict_new();
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
svc = glusterd_conn_get_svc_object(conn);
if (!svc) {
@@ -36,14 +45,17 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
goto out;
}
- ret = rpc_transport_unix_options_build(&options, sockpath, frame_timeout);
+ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout);
if (ret)
goto out;
ret = dict_set_int32n(options, "transport.socket.ignore-enoent",
SLEN("transport.socket.ignore-enoent"), 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=transport.socket.ignore-enoent", NULL);
goto out;
+ }
/* @options is free'd by rpc_transport when destroyed */
rpc = rpc_clnt_new(options, this, (char *)svc->name, 16);
@@ -57,15 +69,18 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
goto out;
ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath);
- if (ret < 0)
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
goto out;
- else
+ } else
ret = 0;
conn->frame_timeout = frame_timeout;
conn->rpc = rpc;
conn->notify = notify;
out:
+ if (options)
+ dict_unref(options);
if (ret) {
if (rpc) {
rpc_clnt_unref(rpc);
@@ -91,7 +106,7 @@ glusterd_conn_connect(glusterd_conn_t *conn)
int
glusterd_conn_disconnect(glusterd_conn_t *conn)
{
- rpc_clnt_disconnect(conn->rpc);
+ rpc_clnt_disable(conn->rpc);
return 0;
}
@@ -132,3 +147,45 @@ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
glusterd_set_socket_filepath(sockfilepath, socketpath, len);
return 0;
}
+
+int
+__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_conf_t *conf = THIS->private;
+ glusterd_svc_proc_t *mux_proc = mydata;
+ int ret = -1;
+
+ /* Silently ignoring this error, exactly like the current
+ * implementation */
+ if (!mux_proc)
+ return 0;
+
+ if (event == RPC_CLNT_DESTROY) {
+ /*RPC_CLNT_DESTROY will only called after mux_proc detached from the
+ * list. So it is safe to call without lock. Processing
+ * RPC_CLNT_DESTROY under a lock will lead to deadlock.
+ */
+ if (mux_proc->data) {
+ glusterd_volinfo_unref(mux_proc->data);
+ mux_proc->data = NULL;
+ }
+ GF_FREE(mux_proc);
+ ret = 0;
+ } else {
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ ret = mux_proc->notify(mux_proc, event);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ }
+ return ret;
+}
+
+int
+glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify(rpc, mydata, event, data,
+ __glusterd_muxsvc_conn_common_notify);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
index 602c0ba7b84..1b225621ab1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
@@ -20,11 +20,11 @@ typedef int (*glusterd_conn_notify_t)(glusterd_conn_t *conn,
struct glusterd_conn_ {
struct rpc_clnt *rpc;
- char sockpath[PATH_MAX];
- int frame_timeout;
/* Existing daemons tend to specialize their respective
* notify implementations, so ... */
glusterd_conn_notify_t notify;
+ int frame_timeout;
+ char sockpath[PATH_MAX];
};
int
@@ -43,9 +43,11 @@ glusterd_conn_disconnect(glusterd_conn_t *conn);
int
glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
rpc_clnt_event_t event, void *data);
+int
+glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
int32_t
glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
int len);
-
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-errno.h b/xlators/mgmt/glusterd/src/glusterd-errno.h
index 7e1575b57af..c74070e0e8d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-errno.h
+++ b/xlators/mgmt/glusterd/src/glusterd-errno.h
@@ -27,7 +27,7 @@ enum glusterd_op_errno {
EG_ISSNAP = 30813, /* Volume is a snap volume */
EG_GEOREPRUN = 30814, /* Geo-Replication is running */
EG_NOTTHINP = 30815, /* Bricks are not thinly provisioned */
- EG_NOGANESHA = 30816, /* obsolete ganesha is not enabled */
+ EG_NOGANESHA = 30816, /* Global ganesha is not enabled */
};
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
new file mode 100644
index 00000000000..f08bd6cebee
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
@@ -0,0 +1,927 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/common-utils.h>
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include <glusterfs/syscall.h>
+
+#include <ctype.h>
+
+int
+start_ganesha(char **op_errstr);
+
+typedef struct service_command {
+ char *binary;
+ char *service;
+ int (*action)(struct service_command *, char *);
+} service_command;
+
+/* parsing_ganesha_ha_conf will allocate the returned string
+ * to be freed (GF_FREE) by the caller
+ * return NULL if error or not found */
+static char *
+parsing_ganesha_ha_conf(const char *key)
+{
+#define MAX_LINE 1024
+ char scratch[MAX_LINE * 2] = {
+ 0,
+ };
+ char *value = NULL, *pointer = NULL, *end_pointer = NULL;
+ FILE *fp;
+
+ fp = fopen(GANESHA_HA_CONF, "r");
+ if (fp == NULL) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "couldn't open the file %s", GANESHA_HA_CONF);
+ goto end_ret;
+ }
+ while ((pointer = fgets(scratch, MAX_LINE, fp)) != NULL) {
+ /* Read config file until we get matching "^[[:space:]]*key" */
+ if (*pointer == '#') {
+ continue;
+ }
+ while (isblank(*pointer)) {
+ pointer++;
+ }
+ if (strncmp(pointer, key, strlen(key))) {
+ continue;
+ }
+ pointer += strlen(key);
+ /* key found : if we fail to parse, we'll return an error
+ * rather than trying next one
+ * - supposition : conf file is bash compatible : no space
+ * around the '=' */
+ if (*pointer != '=') {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_GET_CONFIG_INFO_FAILED, "Parsing %s failed at key %s",
+ GANESHA_HA_CONF, key);
+ goto end_close;
+ }
+ pointer++; /* jump the '=' */
+
+ if (*pointer == '"' || *pointer == '\'') {
+ /* dont get the quote */
+ pointer++;
+ }
+ end_pointer = pointer;
+ /* stop at the next closing quote or blank/newline */
+ do {
+ end_pointer++;
+ } while (!(*end_pointer == '\'' || *end_pointer == '"' ||
+ isspace(*end_pointer) || *end_pointer == '\0'));
+ *end_pointer = '\0';
+
+ /* got it. copy it and return */
+ value = gf_strdup(pointer);
+ break;
+ }
+
+end_close:
+ fclose(fp);
+end_ret:
+ return value;
+}
+
+static int
+sc_systemctl_action(struct service_command *sc, char *command)
+{
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, sc->binary, command, sc->service, NULL);
+ return runner_run(&runner);
+}
+
+static int
+sc_service_action(struct service_command *sc, char *command)
+{
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, sc->binary, sc->service, command, NULL);
+ return runner_run(&runner);
+}
+
+static int
+manage_service(char *action)
+{
+ int i = 0;
+ int ret = 0;
+ struct service_command sc_list[] = {{.binary = "/bin/systemctl",
+ .service = "nfs-ganesha",
+ .action = sc_systemctl_action},
+ {.binary = "/sbin/invoke-rc.d",
+ .service = "nfs-ganesha",
+ .action = sc_service_action},
+ {.binary = "/sbin/service",
+ .service = "nfs-ganesha",
+ .action = sc_service_action},
+ {.binary = NULL}};
+
+ while (sc_list[i].binary != NULL) {
+ ret = sys_access(sc_list[i].binary, X_OK);
+ if (ret == 0) {
+ gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary);
+ return sc_list[i].action(&sc_list[i], action);
+ }
+ i++;
+ }
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNRECOGNIZED_SVC_MNGR,
+ "Could not %s NFS-Ganesha.Service manager for distro"
+ " not recognized.",
+ action);
+ return ret;
+}
+
+/*
+ * Check if the cluster is a ganesha cluster or not *
+ */
+gf_boolean_t
+glusterd_is_ganesha_cluster()
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t ret_bool = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("ganesha", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
+ _gf_false);
+ if (ret == _gf_true) {
+ ret_bool = _gf_true;
+ gf_msg_debug(this->name, 0, "nfs-ganesha is enabled for the cluster");
+ } else
+ gf_msg_debug(this->name, 0, "nfs-ganesha is disabled for the cluster");
+
+out:
+ return ret_bool;
+}
+
+/* Check if ganesha.enable is set to 'on', that checks if
+ * a particular volume is exported via NFS-Ganesha */
+gf_boolean_t
+glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo)
+{
+ char *value = NULL;
+ gf_boolean_t is_exported = _gf_false;
+ int ret = 0;
+
+ ret = glusterd_volinfo_get(volinfo, "ganesha.enable", &value);
+ if ((ret == 0) && value) {
+ if (strcmp(value, "on") == 0) {
+ gf_msg_debug(THIS->name, 0,
+ "ganesha.enable set"
+ " to %s",
+ value);
+ is_exported = _gf_true;
+ }
+ }
+ return is_exported;
+}
+
+/* *
+ * The below function is called as part of commit phase for volume set option
+ * "ganesha.enable". If the value is "on", it creates export configuration file
+ * and then export the volume via dbus command. Incase of "off", the volume
+ * will be already unexported during stage phase, so it will remove the conf
+ * file from shared storage
+ */
+int
+glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+ GF_ASSERT(dict);
+
+ if ((strcmp(key, "ganesha.enable") == 0)) {
+ if ((strcmp(value, "on")) && (strcmp(value, "off"))) {
+ gf_asprintf(errstr,
+ "Invalid value"
+ " for volume set command. Use on/off only.");
+ ret = -1;
+ goto out;
+ }
+ if (strcmp(value, "on") == 0) {
+ ret = glusterd_handle_ganesha_op(dict, errstr, key, value);
+
+ } else if (is_origin_glusterd(dict)) {
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg("glusterd-ganesha", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+ ret = manage_export_config(volname, "off", errstr);
+ }
+ }
+out:
+ if (ret) {
+ gf_msg("glusterd-ganesha", GF_LOG_ERROR, 0,
+ GD_MSG_NFS_GNS_OP_HANDLE_FAIL,
+ "Handling NFS-Ganesha"
+ " op failed.");
+ }
+ return ret;
+}
+
+int
+glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *value = NULL;
+ char *str = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(dict);
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "value", &value);
+ if (value == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "value not present.");
+ goto out;
+ }
+ /* This dict_get will fail if the user had never set the key before */
+ /*Ignoring the ret value and proceeding */
+ ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
+ if (str ? strcmp(value, str) == 0 : strcmp(value, "disable") == 0) {
+ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", value);
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(value, "enable") == 0) {
+ ret = start_ganesha(op_errstr);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL,
+ "Could not start NFS-Ganesha");
+ }
+ } else {
+ ret = stop_ganesha(op_errstr);
+ if (ret)
+ gf_msg_debug(THIS->name, 0,
+ "Could not stop "
+ "NFS-Ganesha.");
+ }
+
+out:
+
+ if (ret) {
+ if (!(*op_errstr)) {
+ *op_errstr = gf_strdup("Error, Validation Failed");
+ gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s",
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL);
+ } else {
+ gf_msg_debug(this->name, 0, "Error, Cannot Validate option");
+ }
+ }
+ return ret;
+}
+
+int
+glusterd_op_set_ganesha(dict_t *dict, char **errstr)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *next_version = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "key", &key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Couldn't get key in global option set");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "value", &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Couldn't get value in global option set");
+ goto out;
+ }
+
+ ret = glusterd_handle_ganesha_op(dict, errstr, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_SETUP_FAIL,
+ "Initial NFS-Ganesha set up failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc(priv->opts,
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set"
+ " nfs-ganesha in dict.");
+ goto out;
+ }
+ ret = glusterd_get_next_global_opt_version_str(priv->opts, &next_version);
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ "Could not fetch "
+ " global op version");
+ goto out;
+ }
+ ret = dict_set_str(priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options(this, priv->opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL,
+ "Failed to store options");
+ goto out;
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+/* Following function parse GANESHA_HA_CONF
+ * The sample file looks like below,
+ * HA_NAME="ganesha-ha-360"
+ * HA_VOL_NAME="ha-state"
+ * HA_CLUSTER_NODES="server1,server2"
+ * VIP_rhs_1="10.x.x.x"
+ * VIP_rhs_2="10.x.x.x." */
+
+/* Check if the localhost is listed as one of nfs-ganesha nodes */
+gf_boolean_t
+check_host_list(void)
+{
+ glusterd_conf_t *priv = NULL;
+ char *hostname, *hostlist;
+ gf_boolean_t ret = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ hostlist = parsing_ganesha_ha_conf("HA_CLUSTER_NODES");
+ if (hostlist == NULL) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_CONFIG_INFO_FAILED,
+ "couldn't get HA_CLUSTER_NODES from file %s", GANESHA_HA_CONF);
+ return _gf_false;
+ }
+
+ /* Hostlist is a comma separated list now */
+ hostname = strtok(hostlist, ",");
+ while (hostname != NULL) {
+ ret = gf_is_local_addr(hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NFS_GNS_HOST_FOUND,
+ "ganesha host found "
+ "Hostname is %s",
+ hostname);
+ break;
+ }
+ hostname = strtok(NULL, ",");
+ }
+
+ GF_FREE(hostlist);
+ return ret;
+}
+
+int
+gd_ganesha_send_dbus(char *volname, char *value)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = -1;
+ runinit(&runner);
+
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", volname, out);
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", value, out);
+
+ ret = 0;
+ if (check_host_list()) {
+ /* Check whether ganesha is running on this node */
+ if (manage_service("status")) {
+ gf_msg("glusterd-ganesha", GF_LOG_WARNING, 0,
+ GD_MSG_GANESHA_NOT_RUNNING,
+ "Export failed, NFS-Ganesha is not running");
+ } else {
+ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
+ value, volname, NULL);
+ ret = runner_run(&runner);
+ }
+ }
+out:
+ return ret;
+}
+
+int
+manage_export_config(char *volname, char *value, char **op_errstr)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_ASSERT(volname);
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/create-export-ganesha.sh",
+ CONFDIR, value, volname, NULL);
+ ret = runner_run(&runner);
+
+ if (ret && op_errstr)
+ gf_asprintf(op_errstr,
+ "Failed to create"
+ " NFS-Ganesha export config file.");
+
+ return ret;
+}
+
+/* Exports and unexports a particular volume via NFS-Ganesha */
+int
+ganesha_manage_export(dict_t *dict, char *value,
+ gf_boolean_t update_cache_invalidation, char **op_errstr)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *vol_opts = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t option = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(value);
+ GF_ASSERT(dict);
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = gf_string2boolean(value, &option);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "invalid value.");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_check_ganesha_export(volinfo);
+ if (ret && option) {
+ gf_asprintf(op_errstr,
+ "ganesha.enable "
+ "is already 'on'.");
+ ret = -1;
+ goto out;
+
+ } else if (!option && !ret) {
+ gf_asprintf(op_errstr,
+ "ganesha.enable "
+ "is already 'off'.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if global option is enabled, proceed only then */
+ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
+ _gf_false);
+ if (ret == -1) {
+ gf_msg_debug(this->name, 0,
+ "Failed to get "
+ "global option dict.");
+ gf_asprintf(op_errstr,
+ "The option "
+ "nfs-ganesha should be "
+ "enabled before setting ganesha.enable.");
+ goto out;
+ }
+ if (!ret) {
+ gf_asprintf(op_errstr,
+ "The option "
+ "nfs-ganesha should be "
+ "enabled before setting ganesha.enable.");
+ ret = -1;
+ goto out;
+ }
+
+ /* *
+ * Create the export file from the node where ganesha.enable "on"
+ * is executed
+ * */
+ if (option && is_origin_glusterd(dict)) {
+ ret = manage_export_config(volname, "on", op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ goto out;
+ }
+ }
+ ret = gd_ganesha_send_dbus(volname, value);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details");
+ goto out;
+ }
+ if (update_cache_invalidation) {
+ vol_opts = volinfo->dict;
+ ret = dict_set_dynstr_with_alloc(vol_opts,
+ "features.cache-invalidation", value);
+ if (ret)
+ gf_asprintf(op_errstr,
+ "Cache-invalidation could not"
+ " be set to %s.",
+ value);
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_asprintf(op_errstr, "failed to store volinfo for %s",
+ volinfo->volname);
+ }
+out:
+ return ret;
+}
+
+int
+tear_down_cluster(gf_boolean_t run_teardown)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+ struct stat st = {
+ 0,
+ };
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+
+ if (run_teardown) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "teardown",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ /* *
+ * Remove all the entries in CONFDIR expect ganesha.conf and
+ * ganesha-ha.conf
+ */
+ dir = sys_opendir(CONFDIR);
+ if (!dir) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to open directory %s. "
+ "Reason : %s",
+ CONFDIR, strerror(errno));
+ ret = 0;
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name);
+ ret = sys_lstat(path, &st);
+ if (ret == -1) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to stat entry %s :"
+ " %s",
+ path, strerror(errno));
+ goto out;
+ }
+
+ if (strcmp(entry->d_name, "ganesha.conf") == 0 ||
+ strcmp(entry->d_name, "ganesha-ha.conf") == 0)
+ gf_msg_debug(THIS->name, 0,
+ " %s is not required"
+ " to remove",
+ path);
+ else if (S_ISDIR(st.st_mode))
+ ret = recursive_rmdir(path);
+ else
+ ret = sys_unlink(path);
+
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ " Failed to remove %s. "
+ "Reason : %s",
+ path, strerror(errno));
+ }
+
+ gf_msg_debug(THIS->name, 0, "%s %s",
+ ret ? "Failed to remove" : "Removed", entry->d_name);
+ }
+
+ ret = sys_closedir(dir);
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to close dir %s. Reason :"
+ " %s",
+ CONFDIR, strerror(errno));
+ }
+ goto exit;
+ }
+
+out:
+ if (dir && sys_closedir(dir)) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to close dir %s. Reason :"
+ " %s",
+ CONFDIR, strerror(errno));
+ }
+exit:
+ return ret;
+}
+
+int
+setup_cluster(gf_boolean_t run_setup)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+
+ if (run_setup) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "setup",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ }
+ return ret;
+}
+
+static int
+teardown(gf_boolean_t run_teardown, char **op_errstr)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = 1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *vol_opts = NULL;
+
+ priv = THIS->private;
+
+ ret = tear_down_cluster(run_teardown);
+ if (ret == -1) {
+ gf_asprintf(op_errstr,
+ "Cleanup of NFS-Ganesha"
+ " HA config failed.");
+ goto out;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "cleanup",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ if (ret)
+ gf_msg_debug(THIS->name, 0,
+ "Could not clean up"
+ " NFS-Ganesha related config");
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ vol_opts = volinfo->dict;
+ /* All the volumes exported via NFS-Ganesha will be
+ unexported, hence setting the appropriate keys */
+ ret = dict_set_str(vol_opts, "features.cache-invalidation", "off");
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Could not set features.cache-invalidation "
+ "to off for %s",
+ volinfo->volname);
+
+ ret = dict_set_str(vol_opts, "ganesha.enable", "off");
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Could not set ganesha.enable to off for %s",
+ volinfo->volname);
+
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo for %s", volinfo->volname);
+ }
+out:
+ return ret;
+}
+
+int
+stop_ganesha(char **op_errstr)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+
+ if (check_host_list()) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+ "--setup-ganesha-conf-files", CONFDIR, "no", NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "removal of symlink ganesha.conf "
+ "in /etc/ganesha failed");
+ }
+ ret = manage_service("stop");
+ if (ret)
+ gf_asprintf(op_errstr,
+ "NFS-Ganesha service could not"
+ "be stopped.");
+ }
+ return ret;
+}
+
+int
+start_ganesha(char **op_errstr)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {
+ 0,
+ };
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+#ifdef BUILD_GNFS
+ /* Gluster-nfs has to be disabled across the trusted pool */
+ /* before attempting to start nfs-ganesha */
+ ret = dict_set_str_sizen(volinfo->dict, NFS_DISABLE_MAP_KEY, "on");
+ if (ret)
+ goto out;
+#endif
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Failed to store the "
+ "Volume information");
+ goto out;
+ }
+ }
+
+ /* If the nfs svc is not initialized it means that the service is not
+ * running, hence we can skip the process of stopping gluster-nfs
+ * service
+ */
+#ifdef BUILD_GNFS
+ if (priv->nfs_svc.inited) {
+ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL);
+ if (ret) {
+ ret = -1;
+ gf_asprintf(op_errstr,
+ "Gluster-NFS service could"
+ "not be stopped, exiting.");
+ goto out;
+ }
+ }
+#endif
+
+ if (check_host_list()) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+ "--setup-ganesha-conf-files", CONFDIR, "yes", NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "creation of symlink ganesha.conf "
+ "in /etc/ganesha failed");
+ goto out;
+ }
+ ret = manage_service("start");
+ if (ret)
+ gf_asprintf(op_errstr,
+ "NFS-Ganesha failed to start."
+ "Please see log file for details");
+ }
+
+out:
+ return ret;
+}
+
+static int
+pre_setup(gf_boolean_t run_setup, char **op_errstr)
+{
+ int ret = 0;
+ if (run_setup) {
+ if (!check_host_list()) {
+ gf_asprintf(op_errstr,
+ "Running nfs-ganesha setup command "
+ "from node which is not part of ganesha cluster");
+ return -1;
+ }
+ }
+ ret = setup_cluster(run_setup);
+ if (ret == -1)
+ gf_asprintf(op_errstr,
+ "Failed to set up HA "
+ "config for NFS-Ganesha. "
+ "Please check the log file for details");
+ return ret;
+}
+
+int
+glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key,
+ char *value)
+{
+ int32_t ret = -1;
+ gf_boolean_t option = _gf_false;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+
+ if (strcmp(key, "ganesha.enable") == 0) {
+ ret = ganesha_manage_export(dict, value, _gf_true, op_errstr);
+ if (ret < 0)
+ goto out;
+ }
+
+ /* It is possible that the key might not be set */
+ ret = gf_string2boolean(value, &option);
+ if (ret == -1) {
+ gf_asprintf(op_errstr, "Invalid value in key-value pair.");
+ goto out;
+ }
+
+ if (strcmp(key, GLUSTERD_STORE_KEY_GANESHA_GLOBAL) == 0) {
+ /* *
+ * The set up/teardown of pcs cluster should be performed only
+ * once. This will done on the node in which the cli command
+ * 'gluster nfs-ganesha <enable/disable>' got executed. So that
+ * node should part of ganesha HA cluster
+ */
+ if (option) {
+ ret = pre_setup(is_origin_glusterd(dict), op_errstr);
+ if (ret < 0)
+ goto out;
+ } else {
+ ret = teardown(is_origin_glusterd(dict), op_errstr);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index 1aa38686d6f..bf062c87060 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -7,7 +7,7 @@
later), or the GNU General Public License, version 2 (GPLv2), in all
cases as published by the Free Software Foundation.
*/
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -17,8 +17,8 @@
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "glusterd-svc-helper.h"
-#include "run.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include "glusterd-messages.h"
#include <signal.h>
@@ -76,6 +76,19 @@ static char *gsync_reserved_opts[] = {"gluster-command",
static char *gsync_no_restart_opts[] = {"checkpoint", "log_rsync_performance",
"log-rsync-performance", NULL};
+void
+set_gsyncd_inet6_arg(runner_t *runner)
+{
+ xlator_t *this = NULL;
+ char *af;
+ int ret;
+
+ this = THIS;
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret == 0)
+ runner_argprintf(runner, "--%s", af);
+}
+
int
__glusterd_handle_sys_exec(rpcsvc_request_t *req)
{
@@ -102,13 +115,18 @@ __glusterd_handle_sys_exec(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -129,13 +147,18 @@ __glusterd_handle_sys_exec(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
ret = dict_set_dynstr(dict, "host-uuid", host_uuid);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=host-uuid", NULL);
goto out;
+ }
}
ret = glusterd_op_begin_synctask(req, cli_op, dict);
@@ -175,13 +198,18 @@ __glusterd_handle_copy_file(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -202,6 +230,8 @@ __glusterd_handle_copy_file(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -254,13 +284,18 @@ __glusterd_handle_gsync_set(rpcsvc_request_t *req)
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
goto out;
}
if (cli_req.dict.dict_len) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
@@ -281,6 +316,8 @@ __glusterd_handle_gsync_set(rpcsvc_request_t *req)
snprintf(err_str, sizeof(err_str),
"Failed to get "
"the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -384,6 +421,7 @@ glusterd_urltransform_init(runner_t *runner, const char *transname)
{
runinit(runner);
runner_add_arg(runner, GSYNCD_PREFIX "/gsyncd");
+ set_gsyncd_inet6_arg(runner);
runner_argprintf(runner, "--%s-url", transname);
}
@@ -725,6 +763,7 @@ glusterd_get_slave_voluuid(char *slave_host, char *slave_vol, char *vol_uuid)
runinit(&runner);
runner_add_arg(&runner, GSYNCD_PREFIX "/gsyncd");
+ set_gsyncd_inet6_arg(&runner);
runner_add_arg(&runner, "--slavevoluuid-get");
runner_argprintf(&runner, "%s::%s", slave_host, slave_vol);
@@ -788,6 +827,7 @@ glusterd_gsync_get_config(char *master, char *slave, char *conf_path,
runinit(&runner);
runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
runner_argprintf(&runner, "--iprefix=%s", DATADIR);
runner_argprintf(&runner, ":%s", master);
runner_add_args(&runner, slave, "--config-get-all", NULL);
@@ -803,6 +843,7 @@ _fcbk_statustostruct(char *resbuf, size_t blen, FILE *fp, void *data)
char *v = NULL;
char *k = NULL;
gf_gsync_status_t *sts_val = NULL;
+ size_t len = 0;
sts_val = (gf_gsync_status_t *)data;
@@ -836,47 +877,63 @@ _fcbk_statustostruct(char *resbuf, size_t blen, FILE *fp, void *data)
}
if (strcmp(k, "worker_status") == 0) {
- memcpy(sts_val->worker_status, v, strlen(v));
- sts_val->worker_status[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->worker_status) - 1));
+ memcpy(sts_val->worker_status, v, len);
+ sts_val->worker_status[len] = '\0';
} else if (strcmp(k, "slave_node") == 0) {
- memcpy(sts_val->slave_node, v, strlen(v));
- sts_val->slave_node[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->slave_node) - 1));
+ memcpy(sts_val->slave_node, v, len);
+ sts_val->slave_node[len] = '\0';
} else if (strcmp(k, "crawl_status") == 0) {
- memcpy(sts_val->crawl_status, v, strlen(v));
- sts_val->crawl_status[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->crawl_status) - 1));
+ memcpy(sts_val->crawl_status, v, len);
+ sts_val->crawl_status[len] = '\0';
} else if (strcmp(k, "last_synced") == 0) {
- memcpy(sts_val->last_synced, v, strlen(v));
- sts_val->last_synced[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->last_synced) - 1));
+ memcpy(sts_val->last_synced, v, len);
+ sts_val->last_synced[len] = '\0';
} else if (strcmp(k, "last_synced_utc") == 0) {
- memcpy(sts_val->last_synced_utc, v, strlen(v));
- sts_val->last_synced_utc[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->last_synced_utc) - 1));
+ memcpy(sts_val->last_synced_utc, v, len);
+ sts_val->last_synced_utc[len] = '\0';
} else if (strcmp(k, "entry") == 0) {
- memcpy(sts_val->entry, v, strlen(v));
- sts_val->entry[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->entry) - 1));
+ memcpy(sts_val->entry, v, len);
+ sts_val->entry[len] = '\0';
} else if (strcmp(k, "data") == 0) {
- memcpy(sts_val->data, v, strlen(v));
- sts_val->data[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->data) - 1));
+ memcpy(sts_val->data, v, len);
+ sts_val->data[len] = '\0';
} else if (strcmp(k, "meta") == 0) {
- memcpy(sts_val->meta, v, strlen(v));
- sts_val->meta[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->meta) - 1));
+ memcpy(sts_val->meta, v, len);
+ sts_val->meta[len] = '\0';
} else if (strcmp(k, "failures") == 0) {
- memcpy(sts_val->failures, v, strlen(v));
- sts_val->failures[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->failures) - 1));
+ memcpy(sts_val->failures, v, len);
+ sts_val->failures[len] = '\0';
} else if (strcmp(k, "checkpoint_time") == 0) {
- memcpy(sts_val->checkpoint_time, v, strlen(v));
- sts_val->checkpoint_time[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_time) - 1));
+ memcpy(sts_val->checkpoint_time, v, len);
+ sts_val->checkpoint_time[len] = '\0';
} else if (strcmp(k, "checkpoint_time_utc") == 0) {
- memcpy(sts_val->checkpoint_time_utc, v, strlen(v));
- sts_val->checkpoint_time_utc[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_time_utc) - 1));
+ memcpy(sts_val->checkpoint_time_utc, v, len);
+ sts_val->checkpoint_time_utc[len] = '\0';
} else if (strcmp(k, "checkpoint_completed") == 0) {
- memcpy(sts_val->checkpoint_completed, v, strlen(v));
- sts_val->checkpoint_completed[strlen(v)] = '\0';
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_completed) - 1));
+ memcpy(sts_val->checkpoint_completed, v, len);
+ sts_val->checkpoint_completed[len] = '\0';
} else if (strcmp(k, "checkpoint_completion_time") == 0) {
- memcpy(sts_val->checkpoint_completion_time, v, strlen(v));
- sts_val->checkpoint_completion_time[strlen(v)] = '\0';
+ len = min(strlen(v),
+ (sizeof(sts_val->checkpoint_completion_time) - 1));
+ memcpy(sts_val->checkpoint_completion_time, v, len);
+ sts_val->checkpoint_completion_time[len] = '\0';
} else if (strcmp(k, "checkpoint_completion_time_utc") == 0) {
- memcpy(sts_val->checkpoint_completion_time_utc, v, strlen(v));
- sts_val->checkpoint_completion_time_utc[strlen(v)] = '\0';
+ len = min(strlen(v),
+ (sizeof(sts_val->checkpoint_completion_time_utc) - 1));
+ memcpy(sts_val->checkpoint_completion_time_utc, v, len);
+ sts_val->checkpoint_completion_time_utc[len] = '\0';
}
GF_FREE(v);
GF_FREE(k);
@@ -900,6 +957,7 @@ glusterd_gsync_get_status(char *master, char *slave, char *conf_path,
runinit(&runner);
runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
runner_argprintf(&runner, "--iprefix=%s", DATADIR);
runner_argprintf(&runner, ":%s", master);
runner_add_args(&runner, slave, "--status-get", NULL);
@@ -920,6 +978,7 @@ glusterd_gsync_get_param_file(char *prmfile, const char *param, char *master,
runinit(&runner);
runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
runner_argprintf(&runner, "--iprefix=%s", DATADIR);
runner_argprintf(&runner, ":%s", master);
runner_add_args(&runner, slave, "--config-get", NULL);
@@ -1695,9 +1754,10 @@ glusterd_store_slave_in_info(glusterd_volinfo_t *volinfo, char *slave,
char *value = NULL;
char *slavekey = NULL;
char *slaveentry = NULL;
- char key[512] = {
+ char key[32] = {
0,
};
+ int keylen;
char *t = NULL;
xlator_t *this = NULL;
struct slave_vol_config slave1 = {
@@ -1775,15 +1835,15 @@ glusterd_store_slave_in_info(glusterd_volinfo_t *volinfo, char *slave,
if (ret == 0) { /* New slave */
dict_foreach(volinfo->gsync_slaves, _get_max_gsync_slave_num, &maxslv);
- snprintf(key, sizeof(key), "slave%d", maxslv + 1);
+ keylen = snprintf(key, sizeof(key), "slave%d", maxslv + 1);
- ret = dict_set_dynstr(volinfo->gsync_slaves, key, value);
+ ret = dict_set_dynstrn(volinfo->gsync_slaves, key, keylen, value);
if (ret) {
GF_FREE(value);
goto out;
}
} else if (ret == -1) { /* Existing slave */
- snprintf(key, sizeof(key), "slave%d", slave1.old_slvidx);
+ keylen = snprintf(key, sizeof(key), "slave%d", slave1.old_slvidx);
gf_msg_debug(this->name, 0,
"Replacing key:%s with new value"
@@ -1791,7 +1851,7 @@ glusterd_store_slave_in_info(glusterd_volinfo_t *volinfo, char *slave,
key, value);
/* Add new slave's value, with the same slave index */
- ret = dict_set_dynstr(volinfo->gsync_slaves, key, value);
+ ret = dict_set_dynstrn(volinfo->gsync_slaves, key, keylen, value);
if (ret) {
GF_FREE(value);
goto out;
@@ -2215,6 +2275,9 @@ glusterd_op_verify_gsync_running(glusterd_volinfo_t *volinfo, char *slave,
"Volume %s needs to be started "
"before " GEOREP " start",
volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEO_REP_START_FAILED,
+ "Volume is not in a started state, Volname=%s",
+ volinfo->volname, NULL);
goto out;
}
@@ -2271,7 +2334,6 @@ glusterd_verify_gsync_status_opts(dict_t *dict, char **op_errstr)
char errmsg[PATH_MAX] = {
0,
};
- gf_boolean_t exists = _gf_false;
glusterd_volinfo_t *volinfo = NULL;
int ret = 0;
char *conf_path = NULL;
@@ -2299,9 +2361,8 @@ glusterd_verify_gsync_status_opts(dict_t *dict, char **op_errstr)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((ret) || (!exists)) {
+ if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
"volume name does not exist");
snprintf(errmsg, sizeof(errmsg),
@@ -2309,7 +2370,6 @@ glusterd_verify_gsync_status_opts(dict_t *dict, char **op_errstr)
" exist",
volname);
*op_errstr = gf_strdup(errmsg);
- ret = -1;
goto out;
}
@@ -2522,6 +2582,7 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
len = snprintf(abs_filename, sizeof(abs_filename), "%s/%s",
priv->workdir, filename);
if ((len < 0) || (len >= sizeof(abs_filename))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2534,6 +2595,9 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL,
+ "Realpath=%s, Reason=%s", priv->workdir, strerror(errno),
+ NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2544,6 +2608,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
"Failed to get "
"realpath of %s: %s",
filename, strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL,
+ "Filename=%s, Reason=%s", filename, strerror(errno), NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2553,6 +2619,7 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
will succeed for /var/lib/glusterd_bad */
len = snprintf(workdir, sizeof(workdir), "%s/", realpath_workdir);
if ((len < 0) || (len >= sizeof(workdir))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2566,6 +2633,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
ret = -1;
goto out;
@@ -2580,6 +2649,8 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
if (len < 0) {
strcpy(errmsg, "<error>");
}
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
goto out;
}
@@ -2588,9 +2659,9 @@ glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
snprintf(errmsg, sizeof(errmsg),
"Source file"
" is not a regular file.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
*op_errstr = gf_strdup(errmsg);
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, "%s",
- errmsg);
ret = -1;
goto out;
}
@@ -2794,6 +2865,7 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
char *slave_ip = NULL;
glusterd_conf_t *priv = NULL;
xlator_t *this = NULL;
+ char *af = NULL;
this = THIS;
GF_ASSERT(this);
@@ -2808,8 +2880,11 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
*/
if (strstr(slave_url, "@")) {
slave_url_buf = gf_strdup(slave_url);
- if (!slave_url_buf)
+ if (!slave_url_buf) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "Slave_url=%s", slave_url, NULL);
goto out;
+ }
slave_user = strtok_r(slave_url_buf, "@", &save_ptr);
slave_ip = strtok_r(NULL, "@", &save_ptr);
@@ -2824,8 +2899,8 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
goto out;
}
- snprintf(log_file_path, sizeof(log_file_path),
- DEFAULT_LOG_FILE_DIRECTORY "/create_verify_log");
+ snprintf(log_file_path, sizeof(log_file_path), "%s/create_verify_log",
+ priv->logdir);
runinit(&runner);
runner_add_args(&runner, GSYNCD_PREFIX "/gverify.sh", NULL);
@@ -2835,9 +2910,16 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
runner_argprintf(&runner, "%s", slave_vol);
runner_argprintf(&runner, "%d", ssh_port);
runner_argprintf(&runner, "%s", log_file_path);
- gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s",
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret)
+ af = "-";
+
+ runner_argprintf(&runner, "%s", af);
+
+ gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s %s",
runner.argv[0], runner.argv[1], runner.argv[2], runner.argv[3],
- runner.argv[4], runner.argv[5], runner.argv[6]);
+ runner.argv[4], runner.argv[5], runner.argv[6],
+ runner.argv[7]);
runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
synclock_unlock(&priv->big_lock);
ret = runner_run(&runner);
@@ -3097,7 +3179,6 @@ glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr)
gf_boolean_t is_force = -1;
gf_boolean_t is_no_verify = -1;
gf_boolean_t is_force_blocker = -1;
- gf_boolean_t exists = _gf_false;
gf_boolean_t is_template_in_use = _gf_false;
glusterd_conf_t *conf = NULL;
glusterd_volinfo_t *volinfo = NULL;
@@ -3147,18 +3228,15 @@ glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((ret) || (!exists)) {
+ if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
"volume name does not exist");
snprintf(errmsg, sizeof(errmsg),
"Volume name %s does not"
" exist",
volname);
- *op_errstr = gf_strdup(errmsg);
- gf_msg_debug(this->name, 0, "Returning %d", ret);
- return -1;
+ goto out;
}
ret = glusterd_get_slave_details_confpath(volinfo, dict, &slave_url,
@@ -3399,6 +3477,12 @@ glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr)
goto out;
}
+ /* There is a remote possibility that slave_host can be NULL when
+ control reaches here. Add a check so we wouldn't crash in next
+ line */
+ if (!slave_host)
+ goto out;
+
/* Now, check whether session is already started.If so, warn!*/
is_different_slavehost = (strcmp(slave_host, slave1.old_slvhost) != 0)
? _gf_true
@@ -3493,7 +3577,6 @@ out:
if (slave_url_buf)
GF_FREE(slave_url_buf);
- gf_msg_debug(this->name, 0, "Returning %d", ret);
return ret;
}
@@ -3572,7 +3655,6 @@ glusterd_op_stage_gsync_set(dict_t *dict, char **op_errstr)
char *statedir = NULL;
char *path_list = NULL;
char *conf_path = NULL;
- gf_boolean_t exists = _gf_false;
glusterd_volinfo_t *volinfo = NULL;
char errmsg[PATH_MAX] = {
0,
@@ -3623,14 +3705,12 @@ glusterd_op_stage_gsync_set(dict_t *dict, char **op_errstr)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((ret) || (!exists)) {
+ if (ret) {
snprintf(errmsg, sizeof(errmsg),
"Volume name %s does not"
" exist",
volname);
- ret = -1;
goto out;
}
@@ -4064,6 +4144,7 @@ gd_pause_or_resume_gsync(dict_t *dict, char *master, char *slave,
out:
sys_close(pfd);
+ /* coverity[INTEGER_OVERFLOW] */
return ret;
}
@@ -4128,10 +4209,10 @@ stop_gsync(char *master, char *slave, char **msg, char *conf_path,
* still be alive, give some more time
* before SIGKILL (hack)
*/
- usleep(50000);
+ gf_nanosleep(50000 * GF_US_IN_NS);
break;
}
- usleep(50000);
+ gf_nanosleep(50000 * GF_US_IN_NS);
}
kill(-pid, SIGKILL);
sys_unlink(pidfile);
@@ -4140,7 +4221,7 @@ stop_gsync(char *master, char *slave, char **msg, char *conf_path,
out:
sys_close(pfd);
-
+ /* coverity[INTEGER_OVERFLOW] */
return ret;
}
@@ -5075,7 +5156,6 @@ glusterd_get_gsync_status(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
char errmsg[PATH_MAX] = {
0,
};
- gf_boolean_t exists = _gf_false;
glusterd_volinfo_t *volinfo = NULL;
int ret = 0;
char my_hostname[256] = {
@@ -5098,9 +5178,8 @@ glusterd_get_gsync_status(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- exists = glusterd_check_volume_exists(volname);
ret = glusterd_volinfo_find(volname, &volinfo);
- if ((ret) || (!exists)) {
+ if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
"volume name does not exist");
snprintf(errmsg, sizeof(errmsg),
@@ -5108,7 +5187,6 @@ glusterd_get_gsync_status(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
" exist",
volname);
*op_errstr = gf_strdup(errmsg);
- ret = -1;
goto out;
}
@@ -5938,7 +6016,7 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname,
GF_ASSERT(this);
ret = glusterd_urltransform_single(slave, "normalize", &linearr);
- if (ret == -1) {
+ if ((ret == -1) || (linearr[0] == NULL)) {
ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Url: %s", slave);
errmsg[ret] = '\0';
*op_errstr = gf_strdup(errmsg);
@@ -5949,7 +6027,10 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname,
tmp = strtok_r(linearr[0], "/", &save_ptr);
tmp = strtok_r(NULL, "/", &save_ptr);
- slave = strtok_r(tmp, ":", &save_ptr);
+ slave = NULL;
+ if (tmp != NULL) {
+ slave = strtok_r(tmp, ":", &save_ptr);
+ }
if (slave) {
ret = glusterd_geo_rep_parse_slave(slave, hostname, op_errstr);
if (ret) {
@@ -6203,26 +6284,28 @@ create_conf_file(glusterd_conf_t *conf, char *conf_path)
/* log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(&runner, "log-file",
- DEFAULT_LOG_FILE_DIRECTORY "/" GEOREP
- "/${mastervol}/${eSlave}.log",
- ".", ".", NULL);
+ runner_add_arg(&runner, "log-file");
+ runner_argprintf(&runner, "%s/%s/${mastervol}/${eSlave}.log", conf->logdir,
+ GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* changelog-log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(&runner, "changelog-log-file",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP "/${mastervol}/${eSlave}${local_id}-changes.log",
- ".", ".", NULL);
+ runner_add_arg(&runner, "changelog-log-file");
+ runner_argprintf(&runner,
+ "%s/%s/${mastervol}/${eSlave}${local_id}-changes.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* gluster-log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(&runner, "gluster-log-file",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP "/${mastervol}/${eSlave}${local_id}.gluster.log",
- ".", ".", NULL);
+ runner_add_arg(&runner, "gluster-log-file");
+ runner_argprintf(&runner,
+ "%s/%s/${mastervol}/${eSlave}${local_id}.gluster.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* ignore-deletes */
@@ -6264,33 +6347,35 @@ create_conf_file(glusterd_conf_t *conf, char *conf_path)
/* log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(
- &runner, "log-file",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP
- "-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log",
- ".", NULL);
+ runner_add_arg(&runner, "log-file");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* MountBroker log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(
- &runner, "log-file-mbr",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP
- "-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log",
- ".", NULL);
+ runner_add_arg(&runner, "log-file-mbr");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/mbr/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
/* gluster-log-file */
runinit_gsyncd_setrx(&runner, conf_path);
- runner_add_args(
- &runner, "gluster-log-file",
- DEFAULT_LOG_FILE_DIRECTORY
- "/" GEOREP
- "-slaves/"
- "${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log",
- ".", NULL);
+ runner_add_arg(&runner, "gluster-log-file");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "gluster.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
RUN_GSYNCD_CMD;
out:
@@ -6341,7 +6426,7 @@ glusterd_create_essential_dir_files(glusterd_volinfo_t *volinfo, dict_t *dict,
ret = -1;
goto out;
}
- ret = mkdir_p(buf, 0777, _gf_true);
+ ret = mkdir_p(buf, 0755, _gf_true);
if (ret) {
len = snprintf(errmsg, sizeof(errmsg),
"Unable to create %s"
@@ -6356,13 +6441,13 @@ glusterd_create_essential_dir_files(glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
- ret = snprintf(buf, PATH_MAX, DEFAULT_LOG_FILE_DIRECTORY "/" GEOREP "/%s",
+ ret = snprintf(buf, PATH_MAX, "%s/" GEOREP "/%s", conf->logdir,
volinfo->volname);
if ((ret < 0) || (ret >= PATH_MAX)) {
ret = -1;
goto out;
}
- ret = mkdir_p(buf, 0777, _gf_true);
+ ret = mkdir_p(buf, 0755, _gf_true);
if (ret) {
len = snprintf(errmsg, sizeof(errmsg),
"Unable to create %s"
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
index 5f5fe344406..7d1318f522c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
@@ -30,8 +30,8 @@ typedef struct glusterd_gsync_status_temp {
} glusterd_gsync_status_temp_t;
typedef struct gsync_status_param {
- int is_active;
glusterd_volinfo_t *volinfo;
+ int is_active;
} gsync_status_param_t;
int
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
index 0b56a0eb45a..319bfa140f3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
@@ -12,7 +12,7 @@
#include "glusterd-utils.h"
#include "glusterd-gfproxyd-svc-helper.h"
#include "glusterd-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "glusterd-volgen.h"
void
@@ -81,7 +81,8 @@ glusterd_svc_build_gfproxyd_volfile_path(glusterd_volinfo_t *volinfo,
void
glusterd_svc_build_gfproxyd_logdir(char *logdir, char *volname, size_t len)
{
- snprintf(logdir, len, "%s/gfproxy/%s", DEFAULT_LOG_FILE_DIRECTORY, volname);
+ glusterd_conf_t *conf = THIS->private;
+ snprintf(logdir, len, "%s/gfproxy/%s", conf->logdir, volname);
}
void
@@ -111,7 +112,7 @@ glusterd_svc_get_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *svc_name,
goto out;
}
- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
tmp_fd = mkstemp(*tmpvol);
if (tmp_fd < 0) {
gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
index 0a78d4d1fd0..a0bfea41f0f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "globals.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
@@ -18,7 +18,7 @@
#include "glusterd-svc-helper.h"
#include "glusterd-svc-mgmt.h"
#include "glusterd-gfproxyd-svc-helper.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
void
glusterd_gfproxydsvc_build(glusterd_svc_t *svc)
@@ -310,7 +310,7 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
}
runinit(&runner);
- if (this->ctx->cmd_args.valgrind) {
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s",
svc->proc.logdir, svc->proc.logfile);
if ((len < 0) || (len >= PATH_MAX)) {
@@ -318,8 +318,13 @@ glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
goto out;
}
- runner_add_args(&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", "--track-origins=yes", NULL);
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
}
@@ -370,6 +375,7 @@ int
glusterd_gfproxydsvc_restart()
{
glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
int ret = -1;
xlator_t *this = THIS;
glusterd_conf_t *conf = NULL;
@@ -380,7 +386,7 @@ glusterd_gfproxydsvc_restart()
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, out);
- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
{
/* Start per volume gfproxyd svc */
if (volinfo->status == GLUSTERD_STATUS_STARTED) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h
index db1c8b1e7b0..d396b4015f3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h
@@ -17,8 +17,8 @@
struct glusterd_gfproxydsvc_ {
glusterd_svc_t svc;
- int port;
gf_store_handle_t *handle;
+ int port;
};
typedef struct glusterd_gfproxydsvc_ glusterd_gfproxydsvc_t;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index ef1df3c3788..1b21c40596d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -9,24 +9,25 @@
*/
#include <inttypes.h>
-#include "glusterfs.h"
-#include "compat.h"
-#include "dict.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
#include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "syscall.h"
-#include "timer.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-#include "run.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/run.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
+#include "glusterd-mgmt.h"
#include "glusterd-server-quorum.h"
#include "glusterd-store.h"
#include "glusterd-locks.h"
@@ -45,15 +46,11 @@
#include <sys/resource.h>
#include <inttypes.h>
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "glusterd-syncop.h"
#include "glusterd-messages.h"
-#ifdef HAVE_BD_XLATOR
-#include <lvm2app.h>
-#endif
-
extern glusterd_op_info_t opinfo;
static int volcount;
@@ -94,16 +91,17 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
glusterd_friend_sm_event_t *event = NULL;
glusterd_friend_req_ctx_t *ctx = NULL;
char rhost[UNIX_PATH_MAX + 1] = {0};
- uuid_t friend_uuid = {0};
dict_t *dict = NULL;
- gf_uuid_parse(uuid_utoa(uuid), friend_uuid);
if (!port)
port = GF_DEFAULT_BASE_PORT;
ret = glusterd_remote_hostname_get(req, rhost, sizeof(rhost));
- rcu_read_lock();
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
+ dict = dict_new();
+
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, rhost);
@@ -129,8 +127,6 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
event->peername = gf_strdup(peerinfo->hostname);
gf_uuid_copy(event->peerid, peerinfo->uuid);
- ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
-
if (!ctx) {
gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
"Unable to allocate memory");
@@ -143,8 +139,8 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ctx->hostname = gf_strdup(hostname);
ctx->req = req;
- dict = dict_new();
if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -152,9 +148,11 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len,
&dict);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
- else
+ } else
dict->extra_stdfree = friend_req->vols.vols_val;
ctx->vols = dict;
@@ -174,7 +172,7 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ret = GLUSTERD_CONNECTION_AWAITED;
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret && (ret != GLUSTERD_CONNECTION_AWAITED)) {
if (ctx && ctx->hostname)
@@ -207,11 +205,14 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
if (!port)
port = GF_DEFAULT_BASE_PORT;
- rcu_read_lock();
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
+
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, hostname);
if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_REQ_FROM_UNKNOWN_PEER,
"Received remove-friend from unknown peer %s", hostname);
ret = glusterd_xfer_friend_remove_resp(req, hostname, port);
@@ -222,6 +223,7 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
&event);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
"event generation failed: %d", ret);
goto out;
@@ -232,12 +234,11 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
gf_uuid_copy(event->peerid, uuid);
- ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
-
if (!ctx) {
+ RCU_READ_UNLOCK;
+ ret = -1;
gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
"Unable to allocate memory");
- ret = -1;
goto out;
}
@@ -251,6 +252,7 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
ret = glusterd_friend_sm_inject_event(event);
if (ret) {
+ RCU_READ_UNLOCK;
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
"Unable to inject event %d, "
"ret = %d",
@@ -258,10 +260,11 @@ glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
goto out;
}
- ret = 0;
+ RCU_READ_UNLOCK;
+
+ return 0;
out:
- rcu_read_unlock();
if (0 != ret) {
if (ctx && ctx->hostname)
@@ -326,81 +329,6 @@ _build_option_key(dict_t *d, char *k, data_t *v, void *tmp)
}
int
-glusterd_add_tier_volume_detail_to_dict(glusterd_volinfo_t *volinfo,
- dict_t *dict, int count)
-{
- int ret = -1;
- char key[64] = {
- 0,
- };
- int keylen;
-
- GF_ASSERT(volinfo);
- GF_ASSERT(dict);
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_type", count);
- ret = dict_set_int32n(dict, key, keylen, volinfo->tier_info.cold_type);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_brick_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_brick_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_dist_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_dist_leaf_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_replica_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_replica_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_arbiter_count", count);
- ret = dict_set_int32n(dict, key, keylen, volinfo->arbiter_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_disperse_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_disperse_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.cold_redundancy_count",
- count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.cold_redundancy_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.hot_type", count);
- ret = dict_set_int32n(dict, key, keylen, volinfo->tier_info.hot_type);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.hot_brick_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.hot_brick_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.hot_replica_count", count);
- ret = dict_set_int32n(dict, key, keylen,
- volinfo->tier_info.hot_replica_count);
- if (ret)
- goto out;
-
-out:
- return ret;
-}
-
-int
glusterd_add_arbiter_info_to_bricks(glusterd_volinfo_t *volinfo,
dict_t *volumes, int count)
{
@@ -409,41 +337,18 @@ glusterd_add_arbiter_info_to_bricks(glusterd_volinfo_t *volinfo,
};
int keylen;
int i = 0;
- int start_index = 0;
int ret = 0;
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- /*TODO: Add info for hot tier once attach tier of arbiter
- * volumes is supported. */
-
- /* cold tier */
- if (volinfo->tier_info.cold_replica_count == 1 ||
- volinfo->arbiter_count != 1)
- return 0;
-
- i = start_index = volinfo->tier_info.hot_brick_count + 1;
- for (; i <= volinfo->brick_count; i++) {
- if ((i - start_index + 1) % volinfo->tier_info.cold_replica_count !=
- 0)
- continue;
- keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter",
- count, i);
- ret = dict_set_int32n(volumes, key, keylen, 1);
- if (ret)
- return ret;
- }
- } else {
- if (volinfo->replica_count == 1 || volinfo->arbiter_count != 1)
- return 0;
- for (i = 1; i <= volinfo->brick_count; i++) {
- if (i % volinfo->replica_count != 0)
- continue;
- keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter",
- count, i);
- ret = dict_set_int32n(volumes, key, keylen, 1);
- if (ret)
- return ret;
- }
+ if (volinfo->replica_count == 1 || volinfo->arbiter_count != 1)
+ return 0;
+ for (i = 1; i <= volinfo->brick_count; i++) {
+ if (i % volinfo->replica_count != 0)
+ continue;
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", count,
+ i);
+ ret = dict_set_int32n(volumes, key, keylen, 1);
+ if (ret)
+ return ret;
}
return 0;
}
@@ -458,6 +363,7 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
};
int keylen;
glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
char *buf = NULL;
int i = 1;
dict_t *dict = NULL;
@@ -467,9 +373,12 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
0,
};
xlator_t *this = NULL;
- GF_UNUSED int caps = 0;
int32_t len = 0;
+ char ta_brick[4096] = {
+ 0,
+ };
+
GF_ASSERT(volinfo);
GF_ASSERT(volumes);
@@ -480,172 +389,129 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
keylen = snprintf(key, sizeof(key), "volume%d.name", count);
ret = dict_set_strn(volumes, key, keylen, volinfo->volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.type", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.status", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->status);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.brick_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->brick_count);
- if (ret)
- goto out;
-
- keylen = snprintf(key, sizeof(key), "volume%d.hot_brick_count", count);
- ret = dict_set_int32n(volumes, key, keylen,
- volinfo->tier_info.hot_brick_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
-
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- ret = glusterd_add_tier_volume_detail_to_dict(volinfo, volumes, count);
- if (ret)
- goto out;
}
keylen = snprintf(key, sizeof(key), "volume%d.dist_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->dist_leaf_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->stripe_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.replica_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->replica_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->disperse_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->redundancy_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->arbiter_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.transport", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->transport_type);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->thin_arbiter_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
- if (!volume_id_str)
+ if (!volume_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.volume_id", count);
ret = dict_set_dynstrn(volumes, key, keylen, volume_id_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.rebalance", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->rebal.defrag_cmd);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.snap_count", count);
ret = dict_set_int32n(volumes, key, keylen, volinfo->snap_count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
-
-#ifdef HAVE_BD_XLATOR
- if (volinfo->caps) {
- caps = 0;
- keylen = snprintf(key, sizeof(key), "volume%d.xlator0", count);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- if (volinfo->caps & CAPS_BD)
- snprintf(buf, 256, "BD");
- ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
-
- if (volinfo->caps & CAPS_THIN) {
- snprintf(key, sizeof(key), "volume%d.xlator0.caps%d", count,
- caps++);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- snprintf(buf, 256, "thin");
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
- }
-
- if (volinfo->caps & CAPS_OFFLOAD_COPY) {
- snprintf(key, sizeof(key), "volume%d.xlator0.caps%d", count,
- caps++);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- snprintf(buf, 256, "offload_copy");
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
- }
-
- if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) {
- snprintf(key, sizeof(key), "volume%d.xlator0.caps%d", count,
- caps++);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- snprintf(buf, 256, "offload_snapshot");
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
- }
-
- if (volinfo->caps & CAPS_OFFLOAD_ZERO) {
- snprintf(key, sizeof(key), "volume%d.xlator0.caps%d", count,
- caps++);
- buf = GF_MALLOC(256, gf_common_mt_char);
- if (!buf) {
- ret = ENOMEM;
- goto out;
- }
- snprintf(buf, 256, "offload_zerofill");
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret) {
- GF_FREE(buf);
- goto out;
- }
- }
}
-#endif
cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
{
@@ -658,42 +524,67 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
len = snprintf(brick, sizeof(brick), "%s:%s", brickinfo->hostname,
brickinfo->path);
if ((len < 0) || (len >= sizeof(brick))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
ret = -1;
goto out;
}
buf = gf_strdup(brick);
keylen = snprintf(key, sizeof(key), "volume%d.brick%d", count, i);
ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "volume%d.brick%d.uuid", count, i);
snprintf(brick_uuid, sizeof(brick_uuid), "%s",
uuid_utoa(brickinfo->uuid));
buf = gf_strdup(brick_uuid);
- if (!buf)
+ if (!buf) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_uuid=%s", brick_uuid, NULL);
goto out;
+ }
ret = dict_set_dynstrn(volumes, key, keylen, buf);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
-
-#ifdef HAVE_BD_XLATOR
- if (volinfo->caps & CAPS_BD) {
- snprintf(key, sizeof(key), "volume%d.vg%d", count, i);
- snprintf(brick, sizeof(brick), "%s", brickinfo->vg);
- buf = gf_strdup(brick);
- ret = dict_set_dynstr(volumes, key, buf);
- if (ret)
- goto out;
}
-#endif
+
i++;
}
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ len = snprintf(ta_brick, sizeof(ta_brick), "%s:%s",
+ ta_brickinfo->hostname, ta_brickinfo->path);
+ if ((len < 0) || (len >= sizeof(ta_brick))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ buf = gf_strdup(ta_brick);
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick",
+ count);
+ ret = dict_set_dynstrn(volumes, key, keylen, buf);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+ }
+
ret = glusterd_add_arbiter_info_to_bricks(volinfo, volumes, count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, NULL);
goto out;
+ }
dict = volinfo->dict;
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = 0;
goto out;
}
@@ -904,9 +795,9 @@ __glusterd_handle_cluster_lock(rpcsvc_request_t *req)
gf_msg_debug(this->name, 0, "Received LOCK from uuid: %s",
uuid_utoa(lock_req.uuid));
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find_by_uuid(lock_req.uuid) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
"%s doesn't "
@@ -929,6 +820,7 @@ __glusterd_handle_cluster_lock(rpcsvc_request_t *req)
op_ctx = dict_new();
if (!op_ctx) {
+ ret = -1;
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
"Unable to set new dict");
goto out;
@@ -955,6 +847,9 @@ out:
glusterd_friend_sm();
glusterd_op_sm();
+ if (ret)
+ GF_FREE(ctx);
+
return ret;
}
@@ -985,11 +880,14 @@ glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid,
gf_msg_debug(this->name, 0, "Received op from uuid %s", str);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
req_ctx = GF_CALLOC(1, sizeof(*req_ctx), mem_type);
if (!req_ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
}
@@ -997,8 +895,8 @@ glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid,
req_ctx->op = op;
ret = dict_unserialize(buf_val, buf_len, &dict);
if (ret) {
- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
- "failed to unserialize the dictionary");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
goto out;
}
@@ -1063,9 +961,9 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req)
ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id);
gf_msg_debug(this->name, 0, "transaction ID = %s", uuid_utoa(*txn_id));
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
"%s doesn't "
@@ -1077,7 +975,11 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req)
/* In cases where there is no volname, the receivers won't have a
* transaction opinfo created, as for those operations, the locking
- * phase where the transaction opinfos are created, won't be called. */
+ * phase where the transaction opinfos are created, won't be called.
+ * skip_locking will be true for all such transaction and we clear
+ * the txn_opinfo after the staging phase, except for geo-replication
+ * operations where we need to access txn_opinfo in the later phases also.
+ */
ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info);
if (ret) {
gf_msg_debug(this->name, 0, "No transaction's opinfo set");
@@ -1086,7 +988,8 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req)
glusterd_txn_opinfo_init(&txn_op_info, &state, &op_req.op,
req_ctx->dict, req);
- txn_op_info.skip_locking = _gf_true;
+ if (req_ctx->op != GD_OP_GSYNC_SET)
+ txn_op_info.skip_locking = _gf_true;
ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
@@ -1144,9 +1047,9 @@ __glusterd_handle_commit_op(rpcsvc_request_t *req)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL);
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
"%s doesn't "
@@ -1267,12 +1170,12 @@ __glusterd_handle_cli_probe(rpcsvc_request_t *req)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_hostname(hostname);
ret = (peerinfo && gd_peer_has_address(peerinfo, hostname));
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg_debug("glusterd", 0,
@@ -1336,6 +1239,7 @@ __glusterd_handle_cli_deprobe(rpcsvc_request_t *req)
glusterd_volinfo_t *tmp = NULL;
glusterd_snap_t *snapinfo = NULL;
glusterd_snap_t *tmpsnap = NULL;
+ gf_boolean_t need_free = _gf_false;
this = THIS;
GF_ASSERT(this);
@@ -1356,6 +1260,13 @@ __glusterd_handle_cli_deprobe(rpcsvc_request_t *req)
if (cli_req.dict.dict_len) {
dict = dict_new();
+ if (dict) {
+ need_free = _gf_true;
+ } else {
+ ret = -1;
+ goto out;
+ }
+
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
if (ret < 0) {
@@ -1451,12 +1362,17 @@ __glusterd_handle_cli_deprobe(rpcsvc_request_t *req)
&op_errno);
}
+ need_free = _gf_false;
+
out:
free(cli_req.dict.dict_val);
if (ret) {
ret = glusterd_xfer_cli_deprobe_resp(req, ret, op_errno, NULL, hostname,
dict);
+ if (need_free) {
+ dict_unref(dict);
+ }
}
glusterd_friend_sm();
@@ -1554,7 +1470,7 @@ __glusterd_handle_cli_get_volume(rpcsvc_request_t *req)
goto out;
}
- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_GET_VOL_REQ_RCVD,
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_GET_VOL_REQ_RCVD,
"Received get vol req");
if (cli_req.dict.dict_len) {
@@ -1756,6 +1672,8 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
if (cli_req.dict.dict_len) {
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
ret = -1;
goto out;
}
@@ -1778,6 +1696,7 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
rsp_dict = dict_new();
if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -1794,9 +1713,8 @@ __glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "Failed to serialize "
- "dictionary.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
ret = 0;
@@ -1815,6 +1733,10 @@ out:
glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ GF_FREE(rsp.dict.dict_val);
+
return 0;
}
int
@@ -1845,8 +1767,10 @@ __glusterd_handle_cli_list_volume(rpcsvc_request_t *req)
GF_ASSERT(priv);
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
{
@@ -1858,8 +1782,11 @@ __glusterd_handle_cli_list_volume(rpcsvc_request_t *req)
}
ret = dict_set_int32n(dict, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
@@ -1881,6 +1808,8 @@ out:
if (dict)
dict_unref(dict);
+ GF_FREE(rsp.dict.dict_val);
+
glusterd_friend_sm();
glusterd_op_sm();
@@ -1904,6 +1833,85 @@ glusterd_op_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
return ret;
}
+int
+__glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_GANESHA;
+ char *op_errstr = NULL;
+ char err_str[2048] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to decode "
+ "request received from cli");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
+ err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ gf_msg_trace(this->name, 0, "Received global option request");
+
+ ret = glusterd_op_begin_synctask(req, GD_OP_GANESHA, dict);
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ if (op_errstr)
+ GF_FREE(op_errstr);
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+int
+glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_ganesha_cmd);
+}
+
static int
__glusterd_handle_reset_volume(rpcsvc_request_t *req)
{
@@ -2230,9 +2238,8 @@ glusterd_fsm_log_send_resp(rpcsvc_request_t *req, int op_ret, char *op_errstr,
ret = dict_allocate_and_serialize(dict, &rsp.fsm_log.fsm_log_val,
&rsp.fsm_log.fsm_log_len);
if (ret < 0) {
- gf_msg("glusterd", GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
return ret;
}
}
@@ -2278,6 +2285,7 @@ __glusterd_handle_fsm_log(rpcsvc_request_t *req)
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -2286,17 +2294,17 @@ __glusterd_handle_fsm_log(rpcsvc_request_t *req)
conf = this->private;
ret = glusterd_sm_tr_log_add_to_dict(dict, &conf->op_sm_log);
} else {
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_hostname(cli_req.name);
if (!peerinfo) {
+ RCU_READ_UNLOCK;
ret = -1;
snprintf(msg, sizeof(msg), "%s is not a peer", cli_req.name);
} else {
ret = glusterd_sm_tr_log_add_to_dict(dict, &peerinfo->sm_log);
+ RCU_READ_UNLOCK;
}
-
- rcu_read_unlock();
}
out:
@@ -2440,9 +2448,9 @@ __glusterd_handle_cluster_unlock(rpcsvc_request_t *req)
gf_msg_debug(this->name, 0, "Received UNLOCK from uuid: %s",
uuid_utoa(unlock_req.uuid));
- rcu_read_lock();
+ RCU_READ_LOCK;
ret = (glusterd_peerinfo_find_by_uuid(unlock_req.uuid) == NULL);
- rcu_read_unlock();
+ RCU_READ_LOCK;
if (ret) {
gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
"%s doesn't "
@@ -2504,8 +2512,8 @@ glusterd_op_stage_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
return ret;
}
@@ -2544,9 +2552,8 @@ glusterd_op_commit_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status,
ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
if (ret < 0) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- GD_MSG_DICT_SERL_LENGTH_GET_FAIL,
- "failed to get serialized length of dict");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
goto out;
}
}
@@ -2721,7 +2728,7 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
{0},
};
dict_t *dict = NULL;
- char key[100] = {
+ char key[32] = {
0,
};
int keylen;
@@ -2753,11 +2760,11 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
}
ret = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
if (glusterd_peerinfo_find(friend_req.uuid, NULL) == NULL) {
ret = -1;
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret) {
gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_REQ_FROM_UNKNOWN_PEER,
"Received friend update request "
@@ -2787,12 +2794,18 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
}
ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_get_int32n(dict, "op", SLEN("op"), &op);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=op", NULL);
goto out;
+ }
if (GD_FRIEND_UPDATE_DEL == op) {
(void)glusterd_handle_friend_update_delete(dict);
@@ -2816,7 +2829,7 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
snprintf(key, sizeof(key), "friend%d", i);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, NULL);
if (peerinfo == NULL) {
/* Create a new peer and add it to the list as there is
@@ -2861,7 +2874,7 @@ __glusterd_handle_friend_update(rpcsvc_request_t *req)
}
}
unlock:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret)
break;
@@ -2964,7 +2977,7 @@ __glusterd_handle_probe_query(rpcsvc_request_t *req)
goto out;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(probe_req.uuid, remote_hostname);
if ((peerinfo == NULL) && (!cds_list_empty(&conf->peers))) {
rsp.op_ret = -1;
@@ -2984,7 +2997,7 @@ __glusterd_handle_probe_query(rpcsvc_request_t *req)
rsp.op_errno = GF_PROBE_ADD_FAILED;
}
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
respond:
gf_uuid_copy(rsp.uuid, MY_UUID);
@@ -3031,10 +3044,13 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
0,
};
xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
GF_ASSERT(req);
this = THIS;
GF_ASSERT(this);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
if (ret < 0) {
@@ -3048,8 +3064,11 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
if (cli_req.dict.dict_len > 0) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict);
}
@@ -3075,12 +3094,21 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
goto out;
}
- ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str));
+ if (conf->op_version < GD_OP_VERSION_6_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. Falling back "
+ "to op-sm framework.",
+ GD_OP_VERSION_6_0);
+ ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str));
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(
+ req, cli_op, dict);
+ }
out:
- glusterd_friend_sm();
- glusterd_op_sm();
-
free(cli_req.dict.dict_val);
if (ret) {
@@ -3267,6 +3295,7 @@ __glusterd_handle_umount(rpcsvc_request_t *req)
/* check if it is allowed to umount path */
path = gf_strdup(umnt_req.path);
if (!path) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL);
rsp.op_errno = ENOMEM;
goto out;
}
@@ -3334,25 +3363,26 @@ glusterd_friend_remove(uuid_t uuid, char *hostname)
int ret = -1;
glusterd_peerinfo_t *peerinfo = NULL;
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, hostname);
if (peerinfo == NULL) {
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
goto out;
}
ret = glusterd_friend_remove_cleanup_vols(peerinfo->uuid);
+ RCU_READ_UNLOCK;
if (ret)
gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL,
"Volumes cleanup failed");
- rcu_read_unlock();
/* Giving up the critical section here as glusterd_peerinfo_cleanup must
* be called from outside a critical section
*/
ret = glusterd_peerinfo_cleanup(peerinfo);
out:
gf_msg_debug(THIS->name, 0, "returning %d", ret);
+ /* coverity[LOCK] */
return ret;
}
@@ -3369,6 +3399,7 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options,
GF_ASSERT(this);
GF_ASSERT(options);
+ GF_VALIDATE_OR_GOTO(this->name, rpc, out);
if (force && rpc && *rpc) {
(void)rpc_clnt_unref(*rpc);
@@ -3381,7 +3412,6 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options,
goto out;
ret = rpc_clnt_register_notify(new_rpc, notify_fn, notify_data);
- *rpc = new_rpc;
if (ret)
goto out;
ret = rpc_clnt_start(new_rpc);
@@ -3390,6 +3420,8 @@ out:
if (new_rpc) {
(void)rpc_clnt_unref(new_rpc);
}
+ } else {
+ *rpc = new_rpc;
}
gf_msg_debug(this->name, 0, "returning %d", ret);
@@ -3397,11 +3429,10 @@ out:
}
int
-glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
- int port)
+glusterd_transport_inet_options_build(dict_t *dict, const char *hostname,
+ int port, char *af)
{
xlator_t *this = NULL;
- dict_t *dict = NULL;
int32_t interval = -1;
int32_t time = -1;
int32_t timeout = -1;
@@ -3409,14 +3440,14 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
this = THIS;
GF_ASSERT(this);
- GF_ASSERT(options);
+ GF_ASSERT(dict);
GF_ASSERT(hostname);
if (!port)
port = GLUSTERD_DEFAULT_PORT;
/* Build default transport options */
- ret = rpc_transport_inet_options_build(&dict, hostname, port);
+ ret = rpc_transport_inet_options_build(dict, hostname, port, af);
if (ret)
goto out;
@@ -3456,7 +3487,6 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname,
if ((interval > 0) || (time > 0))
ret = rpc_transport_keepalive_options_set(dict, interval, time,
timeout);
- *options = dict;
out:
gf_msg_debug("glusterd", 0, "Returning %d", ret);
return ret;
@@ -3470,10 +3500,19 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
int ret = -1;
glusterd_peerctx_t *peerctx = NULL;
data_t *data = NULL;
+ char *af = NULL;
peerctx = GF_CALLOC(1, sizeof(*peerctx), gf_gld_mt_peerctx_t);
- if (!peerctx)
+ if (!peerctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
goto out;
+ }
+
+ options = dict_new();
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
if (args)
peerctx->args = *args;
@@ -3485,8 +3524,12 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
uniquely identify a
peerinfo */
- ret = glusterd_transport_inet_options_build(&options, peerinfo->hostname,
- peerinfo->port);
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret)
+ gf_log(this->name, GF_LOG_TRACE,
+ "option transport.address-family is not set in xlator options");
+ ret = glusterd_transport_inet_options_build(options, peerinfo->hostname,
+ peerinfo->port, af);
if (ret)
goto out;
@@ -3495,6 +3538,7 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
* create our RPC endpoint with the same address that the peer would
* use to reach us.
*/
+
if (this->options) {
data = dict_getn(this->options, "transport.socket.bind-address",
SLEN("transport.socket.bind-address"));
@@ -3536,6 +3580,9 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
peerctx = NULL;
ret = 0;
out:
+ if (options)
+ dict_unref(options);
+
GF_FREE(peerctx);
return ret;
}
@@ -3559,6 +3606,7 @@ glusterd_friend_add(const char *hoststr, int port,
*friend = glusterd_peerinfo_new(state, uuid, hoststr, port);
if (*friend == NULL) {
ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADD_FAIL, NULL);
goto out;
}
@@ -3657,7 +3705,7 @@ glusterd_probe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
GF_ASSERT(hoststr);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(NULL, hoststr);
if (peerinfo == NULL) {
@@ -3702,7 +3750,7 @@ glusterd_probe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
}
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
gf_msg_debug("glusterd", 0, "returning %d", ret);
return ret;
}
@@ -3719,7 +3767,7 @@ glusterd_deprobe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
GF_ASSERT(hoststr);
GF_ASSERT(req);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find(uuid, hoststr);
if (peerinfo == NULL) {
@@ -3780,7 +3828,7 @@ glusterd_deprobe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
peerinfo->detaching = _gf_true;
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
return ret;
}
@@ -4027,8 +4075,11 @@ set_deprobe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr,
case GF_DEPROBE_BRICK_EXIST:
snprintf(errstr, len,
- "Brick(s) with the peer "
- "%s exist in cluster",
+ "Peer %s hosts one or more bricks. If the peer is in "
+ "not recoverable state then use either replace-brick "
+ "or remove-brick command with force to remove all "
+ "bricks from the peer and attempt the peer detach "
+ "again.",
hostname);
break;
@@ -4133,19 +4184,21 @@ glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags)
};
int keylen;
- priv = THIS->private;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
GF_ASSERT(priv);
friends = dict_new();
if (!friends) {
- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
- "Out of Memory");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
}
/* Reset ret to 0, needed to prevent failure in case no peers exist */
ret = 0;
- rcu_read_lock();
+ RCU_READ_LOCK;
if (!cds_list_empty(&priv->peers)) {
cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
{
@@ -4156,7 +4209,7 @@ glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags)
}
}
unlock:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
if (ret)
goto out;
@@ -4165,24 +4218,36 @@ unlock:
keylen = snprintf(key, sizeof(key), "friend%d.uuid", count);
uuid_utoa_r(MY_UUID, my_uuid_str);
ret = dict_set_strn(friends, key, keylen, my_uuid_str);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.hostname", count);
ret = dict_set_nstrn(friends, key, keylen, "localhost",
SLEN("localhost"));
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
keylen = snprintf(key, sizeof(key), "friend%d.connected", count);
ret = dict_set_int32n(friends, key, keylen, 1);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
goto out;
+ }
}
ret = dict_set_int32n(friends, "count", SLEN("count"), count);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(friends, &rsp.friends.friends_val,
&rsp.friends.friends_len);
@@ -4354,8 +4419,11 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req)
if (cli_req.dict.dict_len > 0) {
dict = dict_new();
- if (!dict)
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
goto out;
+ }
ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
&dict);
if (ret < 0) {
@@ -4429,17 +4497,6 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req)
goto out;
}
- if ((cmd & GF_CLI_STATUS_TIERD) &&
- (conf->op_version < GD_OP_VERSION_3_10_0)) {
- snprintf(err_str, sizeof(err_str),
- "The cluster is operating "
- "at a lesser version than %d. Getting the status of "
- "tierd is not allowed in this state",
- GD_OP_VERSION_3_6_0);
- ret = -1;
- goto out;
- }
-
if ((cmd & GF_CLI_STATUS_SCRUB) &&
(conf->op_version < GD_OP_VERSION_3_7_0)) {
snprintf(err_str, sizeof(err_str),
@@ -4634,6 +4691,7 @@ __glusterd_handle_barrier(rpcsvc_request_t *req)
dict = dict_new();
if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
ret = -1;
goto out;
}
@@ -4986,6 +5044,7 @@ out:
&rsp.dict.dict_len);
glusterd_submit_reply(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp);
+ GF_FREE(rsp.dict.dict_val);
GF_FREE(key_fixed);
return ret;
}
@@ -5167,12 +5226,17 @@ glusterd_print_gsync_status_by_vol(FILE *fp, glusterd_volinfo_t *volinfo)
0,
};
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
GF_VALIDATE_OR_GOTO(THIS->name, fp, out);
gsync_rsp_dict = dict_new();
- if (!gsync_rsp_dict)
+ if (!gsync_rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
goto out;
+ }
ret = gethostname(my_hostname, sizeof(my_hostname));
if (ret) {
@@ -5199,7 +5263,7 @@ glusterd_print_snapinfo_by_vol(FILE *fp, glusterd_volinfo_t *volinfo,
glusterd_volinfo_t *tmp_vol = NULL;
glusterd_snap_t *snapinfo = NULL;
int snapcount = 0;
- char timestr[64] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
char snap_status_str[STATUS_STRLEN] = {
@@ -5312,19 +5376,30 @@ glusterd_print_client_details(FILE *fp, dict_t *dict,
brick_req->op = GLUSTERD_BRICK_STATUS;
brick_req->name = "";
+ brick_req->dict.dict_val = NULL;
+ brick_req->dict.dict_len = 0;
ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
brickinfo->path);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-name", NULL);
goto out;
+ }
ret = dict_set_int32n(dict, "cmd", SLEN("cmd"), GF_CLI_STATUS_CLIENTS);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cmd", NULL);
goto out;
+ }
ret = dict_set_strn(dict, "volname", SLEN("volname"), volinfo->volname);
- if (ret)
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=volname", NULL);
goto out;
+ }
ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
&brick_req->input.input_len);
@@ -5455,14 +5530,11 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
uint32_t get_state_cmd = 0;
uint64_t memtotal = 0;
uint64_t memfree = 0;
- int start_index = 0;
char id_str[64] = {
0,
};
char *vol_type_str = NULL;
- char *hot_tier_type_str = NULL;
- char *cold_tier_type_str = NULL;
char transport_type_str[STATUS_STRLEN] = {
0,
@@ -5476,7 +5548,9 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
char vol_status_str[STATUS_STRLEN] = {
0,
};
-
+ char brick_status_str[STATUS_STRLEN] = {
+ 0,
+ };
this = THIS;
GF_VALIDATE_OR_GOTO(THIS->name, this, out);
@@ -5519,7 +5593,7 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
ret = dict_get_strn(dict, "filename", SLEN("filename"), &tmp_str);
if (ret) {
- now = time(NULL);
+ now = gf_time();
strftime(timestamp, sizeof(timestamp), "%Y%m%d_%H%M%S",
localtime(&now));
gf_asprintf(&filename, "%s_%s", "glusterd_state", timestamp);
@@ -5530,10 +5604,9 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
gf_asprintf(&filename, "%s", tmp_str);
}
- if (odir[odirlen - 1] != '/')
- strcat(odir, "/");
+ ret = gf_asprintf(&ofilepath, "%s%s%s", odir,
+ ((odir[odirlen - 1] != '/') ? "/" : ""), filename);
- ret = gf_asprintf(&ofilepath, "%s%s", odir, filename);
if (ret < 0) {
GF_FREE(odir);
GF_FREE(filename);
@@ -5585,6 +5658,8 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
"fetch the value of all volume options "
"for volume %s",
volinfo->volname);
+ if (vol_all_opts)
+ dict_unref(vol_all_opts);
continue;
}
@@ -5609,8 +5684,8 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
if (priv->opts)
dict_foreach(priv->opts, glusterd_print_global_options, fp);
- rcu_read_lock();
fprintf(fp, "\n[Peers]\n");
+ RCU_READ_LOCK;
cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
{
@@ -5639,7 +5714,7 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
count_bkp = 0;
fprintf(fp, "\n");
}
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
count = 0;
fprintf(fp, "\n[Volumes]\n");
@@ -5708,26 +5783,11 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
brickinfo->hostname);
/* Determine which one is the arbiter brick */
if (volinfo->arbiter_count == 1) {
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- if (volinfo->tier_info.cold_replica_count != 1) {
- start_index = volinfo->tier_info.hot_brick_count + 1;
- if (count >= start_index &&
- ((count - start_index + 1) %
- volinfo->tier_info.cold_replica_count ==
- 0)) {
- fprintf(fp,
- "Volume%d.Brick%d."
- "is_arbiter: 1\n",
- count_bkp, count);
- }
- }
- } else {
- if (count % volinfo->replica_count == 0) {
- fprintf(fp,
- "Volume%d.Brick%d."
- "is_arbiter: 1\n",
- count_bkp, count);
- }
+ if (count % volinfo->replica_count == 0) {
+ fprintf(fp,
+ "Volume%d.Brick%d."
+ "is_arbiter: 1\n",
+ count_bkp, count);
}
}
/* Add following information only for bricks
@@ -5740,27 +5800,21 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
brickinfo->rdma_port);
fprintf(fp, "Volume%d.Brick%d.port_registered: %d\n", count_bkp,
count, brickinfo->port_registered);
+ glusterd_brick_get_status_str(brickinfo, brick_status_str);
fprintf(fp, "Volume%d.Brick%d.status: %s\n", count_bkp, count,
- brickinfo->status ? "Started" : "Stopped");
-
- /*FIXME: This is a hacky way of figuring out whether a
- * brick belongs to the hot or cold tier */
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- fprintf(fp, "Volume%d.Brick%d.tier: %s\n", count_bkp, count,
- count <= volinfo->tier_info.hot_brick_count ? "Hot"
- : "Cold");
- }
+ brick_status_str);
ret = sys_statvfs(brickinfo->path, &brickstat);
if (ret) {
gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
"statfs error: %s ", strerror(errno));
- goto out;
+ memfree = 0;
+ memtotal = 0;
+ } else {
+ memfree = brickstat.f_bfree * brickstat.f_bsize;
+ memtotal = brickstat.f_blocks * brickstat.f_bsize;
}
- memfree = brickstat.f_bfree * brickstat.f_bsize;
- memtotal = brickstat.f_blocks * brickstat.f_bsize;
-
fprintf(fp, "Volume%d.Brick%d.spacefree: %" PRIu64 "Bytes\n",
count_bkp, count, memfree);
fprintf(fp, "Volume%d.Brick%d.spacetotal: %" PRIu64 "Bytes\n",
@@ -5826,50 +5880,10 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
GF_FREE(rebal_data);
- if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
- ret = glusterd_volume_get_hot_tier_type_str(volinfo,
- &hot_tier_type_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
- "Failed to get hot tier type for "
- "volume: %s",
- volinfo->volname);
- goto out;
- }
-
- ret = glusterd_volume_get_cold_tier_type_str(volinfo,
- &cold_tier_type_str);
- if (ret) {
- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
- "Failed to get cold tier type for "
- "volume: %s",
- volinfo->volname);
- goto out;
- }
-
- fprintf(fp, "Volume%d.tier_info.cold_tier_type: %s\n", count,
- cold_tier_type_str);
- fprintf(fp, "Volume%d.tier_info.cold_brick_count: %d\n", count,
- volinfo->tier_info.cold_brick_count);
- fprintf(fp, "Volume%d.tier_info.cold_replica_count: %d\n", count,
- volinfo->tier_info.cold_replica_count);
- fprintf(fp, "Volume%d.tier_info.cold_disperse_count: %d\n", count,
- volinfo->tier_info.cold_disperse_count);
- fprintf(fp, "Volume%d.tier_info.cold_dist_leaf_count: %d\n", count,
- volinfo->tier_info.cold_dist_leaf_count);
- fprintf(fp, "Volume%d.tier_info.cold_redundancy_count: %d\n", count,
- volinfo->tier_info.cold_redundancy_count);
- fprintf(fp, "Volume%d.tier_info.hot_tier_type: %s\n", count,
- hot_tier_type_str);
- fprintf(fp, "Volume%d.tier_info.hot_brick_count: %d\n", count,
- volinfo->tier_info.hot_brick_count);
- fprintf(fp, "Volume%d.tier_info.hot_replica_count: %d\n", count,
- volinfo->tier_info.hot_replica_count);
- fprintf(fp, "Volume%d.tier_info.promoted: %d\n", count,
- volinfo->tier_info.promoted);
- fprintf(fp, "Volume%d.tier_info.demoted: %d\n", count,
- volinfo->tier_info.demoted);
- }
+ fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count,
+ volinfo->shd.svc.online ? "Online" : "Offline");
+ fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count,
+ volinfo->shd.svc.inited ? "True" : "False");
if (volinfo->rep_brick.src_brick && volinfo->rep_brick.dst_brick) {
fprintf(fp, "Volume%d.replace_brick.src: %s:%s\n", count,
@@ -5894,19 +5908,13 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
count = 0;
fprintf(fp, "\n[Services]\n");
-
- if (priv->shd_svc.inited) {
- fprintf(fp, "svc%d.name: %s\n", ++count, priv->shd_svc.name);
- fprintf(fp, "svc%d.online_status: %s\n\n", count,
- priv->shd_svc.online ? "Online" : "Offline");
- }
-
+#ifdef BUILD_GNFS
if (priv->nfs_svc.inited) {
fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name);
fprintf(fp, "svc%d.online_status: %s\n\n", count,
priv->nfs_svc.online ? "Online" : "Offline");
}
-
+#endif
if (priv->bitd_svc.inited) {
fprintf(fp, "svc%d.name: %s\n", ++count, priv->bitd_svc.name);
fprintf(fp, "svc%d.online_status: %s\n\n", count,
@@ -5942,6 +5950,7 @@ out:
ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val,
&rsp.dict.dict_len);
glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict);
+ GF_FREE(rsp.dict.dict_val);
return ret;
}
@@ -6028,14 +6037,27 @@ get_brickinfo_from_brickid(char *brickid, glusterd_brickinfo_t **brickinfo)
uuid_t volid = {0};
int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
brickid_dup = gf_strdup(brickid);
- if (!brickid_dup)
+ if (!brickid_dup) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_id=%s", brickid, NULL);
goto out;
+ }
volid_str = brickid_dup;
brick = strchr(brickid_dup, ':');
- if (!volid_str || !brick)
+ if (!volid_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
goto out;
+ }
+
+ if (!brick) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ goto out;
+ }
*brick = '\0';
brick++;
@@ -6253,7 +6275,7 @@ glusterd_friend_remove_notify(glusterd_peerctx_t *peerctx, int32_t op_errno)
GF_ASSERT(peerctx);
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
gf_msg_debug(THIS->name, 0,
@@ -6293,7 +6315,7 @@ glusterd_friend_remove_notify(glusterd_peerctx_t *peerctx, int32_t op_errno)
}
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
return ret;
}
@@ -6340,7 +6362,7 @@ __glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata,
event, peerctx->peername);
return 0;
}
- rcu_read_lock();
+ RCU_READ_LOCK;
peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
if (!peerinfo) {
@@ -6453,7 +6475,7 @@ __glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata,
}
out:
- rcu_read_unlock();
+ RCU_READ_UNLOCK;
glusterd_friend_sm();
glusterd_op_sm();
@@ -6476,20 +6498,26 @@ glusterd_null(rpcsvc_request_t *req)
return 0;
}
-rpcsvc_actor_t gd_svc_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
- [GLUSTERD_MGMT_NULL] = {"NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0,
- DRC_NA},
- [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK,
- glusterd_handle_cluster_lock, NULL, 0,
- DRC_NA},
+static rpcsvc_actor_t gd_svc_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
+ [GLUSTERD_MGMT_NULL] = {"NULL", glusterd_null, NULL, GLUSTERD_MGMT_NULL,
+ DRC_NA, 0},
+ [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK",
+ glusterd_handle_cluster_lock, NULL,
+ GLUSTERD_MGMT_CLUSTER_LOCK, DRC_NA, 0},
[GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK",
- GLUSTERD_MGMT_CLUSTER_UNLOCK,
- glusterd_handle_cluster_unlock, NULL, 0,
- DRC_NA},
- [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", GLUSTERD_MGMT_STAGE_OP,
- glusterd_handle_stage_op, NULL, 0, DRC_NA},
- [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP,
- glusterd_handle_commit_op, NULL, 0, DRC_NA},
+ glusterd_handle_cluster_unlock, NULL,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK, DRC_NA, 0},
+ [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_handle_stage_op, NULL,
+ GLUSTERD_MGMT_STAGE_OP, DRC_NA, 0},
+ [GLUSTERD_MGMT_COMMIT_OP] =
+ {
+ "COMMIT_OP",
+ glusterd_handle_commit_op,
+ NULL,
+ GLUSTERD_MGMT_COMMIT_OP,
+ DRC_NA,
+ 0,
+ },
};
struct rpcsvc_program gd_svc_mgmt_prog = {
@@ -6501,19 +6529,18 @@ struct rpcsvc_program gd_svc_mgmt_prog = {
.synctask = _gf_true,
};
-rpcsvc_actor_t gd_svc_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
- [GLUSTERD_FRIEND_NULL] = {"NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL,
- 0, DRC_NA},
- [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", GLUSTERD_PROBE_QUERY,
- glusterd_handle_probe_query, NULL, 0, DRC_NA},
- [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", GLUSTERD_FRIEND_ADD,
- glusterd_handle_incoming_friend_req, NULL, 0,
- DRC_NA},
- [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", GLUSTERD_FRIEND_REMOVE,
- glusterd_handle_incoming_unfriend_req, NULL, 0,
- DRC_NA},
- [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", GLUSTERD_FRIEND_UPDATE,
- glusterd_handle_friend_update, NULL, 0, DRC_NA},
+static rpcsvc_actor_t gd_svc_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
+ [GLUSTERD_FRIEND_NULL] = {"NULL", glusterd_null, NULL, GLUSTERD_MGMT_NULL,
+ DRC_NA, 0},
+ [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_handle_probe_query, NULL,
+ GLUSTERD_PROBE_QUERY, DRC_NA, 0},
+ [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_handle_incoming_friend_req,
+ NULL, GLUSTERD_FRIEND_ADD, DRC_NA, 0},
+ [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE",
+ glusterd_handle_incoming_unfriend_req, NULL,
+ GLUSTERD_FRIEND_REMOVE, DRC_NA, 0},
+ [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_handle_friend_update,
+ NULL, GLUSTERD_FRIEND_UPDATE, DRC_NA, 0},
};
struct rpcsvc_program gd_svc_peer_prog = {
@@ -6525,116 +6552,109 @@ struct rpcsvc_program gd_svc_peer_prog = {
.synctask = _gf_false,
};
-rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
- [GLUSTER_CLI_PROBE] = {"CLI_PROBE", GLUSTER_CLI_PROBE,
- glusterd_handle_cli_probe, NULL, 0, DRC_NA},
+static rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_PROBE] = {"CLI_PROBE", glusterd_handle_cli_probe, NULL,
+ GLUSTER_CLI_PROBE, DRC_NA, 0},
[GLUSTER_CLI_CREATE_VOLUME] = {"CLI_CREATE_VOLUME",
- GLUSTER_CLI_CREATE_VOLUME,
- glusterd_handle_create_volume, NULL, 0,
- DRC_NA},
+ glusterd_handle_create_volume, NULL,
+ GLUSTER_CLI_CREATE_VOLUME, DRC_NA, 0},
[GLUSTER_CLI_DEFRAG_VOLUME] = {"CLI_DEFRAG_VOLUME",
- GLUSTER_CLI_DEFRAG_VOLUME,
- glusterd_handle_defrag_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_DEPROBE] = {"FRIEND_REMOVE", GLUSTER_CLI_DEPROBE,
- glusterd_handle_cli_deprobe, NULL, 0, DRC_NA},
- [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS,
- glusterd_handle_cli_list_friends, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", GLUSTER_CLI_UUID_RESET,
- glusterd_handle_cli_uuid_reset, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_UUID_GET] = {"UUID_GET", GLUSTER_CLI_UUID_GET,
- glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA},
- [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", GLUSTER_CLI_START_VOLUME,
- glusterd_handle_cli_start_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME,
- glusterd_handle_cli_stop_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME,
- glusterd_handle_cli_delete_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", GLUSTER_CLI_GET_VOLUME,
- glusterd_handle_cli_get_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", GLUSTER_CLI_ADD_BRICK,
- glusterd_handle_add_brick, NULL, 0, DRC_NA},
- [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER,
- glusterd_handle_attach_tier, NULL, 0, DRC_NA},
- [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK,
- glusterd_handle_replace_brick, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK,
- glusterd_handle_remove_brick, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_LOG_ROTATE] = {"LOG FILENAME", GLUSTER_CLI_LOG_ROTATE,
- glusterd_handle_log_rotate, NULL, 0, DRC_NA},
- [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", GLUSTER_CLI_SET_VOLUME,
- glusterd_handle_set_volume, NULL, 0, DRC_NA},
- [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME,
- glusterd_handle_sync_volume, NULL, 0, DRC_NA},
- [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME,
- glusterd_handle_reset_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", GLUSTER_CLI_FSM_LOG,
- glusterd_handle_fsm_log, NULL, 0, DRC_NA},
- [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", GLUSTER_CLI_GSYNC_SET,
- glusterd_handle_gsync_set, NULL, 0, DRC_NA},
- [GLUSTER_CLI_PROFILE_VOLUME] = {"STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME,
- glusterd_handle_cli_profile_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_QUOTA] = {"QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota,
- NULL, 0, DRC_NA},
- [GLUSTER_CLI_GETWD] = {"GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd,
- NULL, 1, DRC_NA},
- [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME,
- glusterd_handle_status_volume, NULL, 0,
- DRC_NA},
- [GLUSTER_CLI_MOUNT] = {"MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount,
- NULL, 1, DRC_NA},
- [GLUSTER_CLI_UMOUNT] = {"UMOUNT", GLUSTER_CLI_UMOUNT,
- glusterd_handle_umount, NULL, 1, DRC_NA},
- [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME,
- glusterd_handle_cli_heal_volume, NULL, 0,
- DRC_NA},
+ glusterd_handle_defrag_volume, NULL,
+ GLUSTER_CLI_DEFRAG_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_DEPROBE] = {"FRIEND_REMOVE", glusterd_handle_cli_deprobe, NULL,
+ GLUSTER_CLI_DEPROBE, DRC_NA, 0},
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS",
+ glusterd_handle_cli_list_friends, NULL,
+ GLUSTER_CLI_LIST_FRIENDS, DRC_NA, 0},
+ [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", glusterd_handle_cli_uuid_reset,
+ NULL, GLUSTER_CLI_UUID_RESET, DRC_NA, 0},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", glusterd_handle_cli_uuid_get, NULL,
+ GLUSTER_CLI_UUID_GET, DRC_NA, 0},
+ [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME",
+ glusterd_handle_cli_start_volume, NULL,
+ GLUSTER_CLI_START_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", glusterd_handle_cli_stop_volume,
+ NULL, GLUSTER_CLI_STOP_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME",
+ glusterd_handle_cli_delete_volume, NULL,
+ GLUSTER_CLI_DELETE_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", glusterd_handle_cli_get_volume,
+ NULL, GLUSTER_CLI_GET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", glusterd_handle_add_brick, NULL,
+ GLUSTER_CLI_ADD_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", glusterd_handle_attach_tier,
+ NULL, GLUSTER_CLI_ATTACH_TIER, DRC_NA, 0},
+ [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK",
+ glusterd_handle_replace_brick, NULL,
+ GLUSTER_CLI_REPLACE_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", glusterd_handle_remove_brick,
+ NULL, GLUSTER_CLI_REMOVE_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_LOG_ROTATE] = {"LOG FILENAME", glusterd_handle_log_rotate,
+ NULL, GLUSTER_CLI_LOG_ROTATE, DRC_NA, 0},
+ [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", glusterd_handle_set_volume, NULL,
+ GLUSTER_CLI_SET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", glusterd_handle_sync_volume,
+ NULL, GLUSTER_CLI_SYNC_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", glusterd_handle_reset_volume,
+ NULL, GLUSTER_CLI_RESET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", glusterd_handle_fsm_log, NULL,
+ GLUSTER_CLI_FSM_LOG, DRC_NA, 0},
+ [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", glusterd_handle_gsync_set, NULL,
+ GLUSTER_CLI_GSYNC_SET, DRC_NA, 0},
+ [GLUSTER_CLI_PROFILE_VOLUME] = {"STATS_VOLUME",
+ glusterd_handle_cli_profile_volume, NULL,
+ GLUSTER_CLI_PROFILE_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_QUOTA] = {"QUOTA", glusterd_handle_quota, NULL,
+ GLUSTER_CLI_QUOTA, DRC_NA, 0},
+ [GLUSTER_CLI_GETWD] = {"GETWD", glusterd_handle_getwd, NULL,
+ GLUSTER_CLI_GETWD, DRC_NA, 1},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME",
+ glusterd_handle_status_volume, NULL,
+ GLUSTER_CLI_STATUS_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", glusterd_handle_mount, NULL,
+ GLUSTER_CLI_MOUNT, DRC_NA, 1},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", glusterd_handle_umount, NULL,
+ GLUSTER_CLI_UMOUNT, DRC_NA, 1},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", glusterd_handle_cli_heal_volume,
+ NULL, GLUSTER_CLI_HEAL_VOLUME, DRC_NA, 0},
[GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME",
- GLUSTER_CLI_STATEDUMP_VOLUME,
glusterd_handle_cli_statedump_volume,
- NULL, 0, DRC_NA},
- [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME,
- glusterd_handle_cli_list_volume, NULL, 0,
- DRC_NA},
+ NULL, GLUSTER_CLI_STATEDUMP_VOLUME,
+ DRC_NA, 0},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", glusterd_handle_cli_list_volume,
+ NULL, GLUSTER_CLI_LIST_VOLUME, DRC_NA, 0},
[GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME",
- GLUSTER_CLI_CLRLOCKS_VOLUME,
glusterd_handle_cli_clearlocks_volume,
- NULL, 0, DRC_NA},
- [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE,
- glusterd_handle_copy_file, NULL, 0, DRC_NA},
- [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC,
- glusterd_handle_sys_exec, NULL, 0, DRC_NA},
- [GLUSTER_CLI_SNAP] = {"SNAP", GLUSTER_CLI_SNAP, glusterd_handle_snapshot,
- NULL, 0, DRC_NA},
- [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME",
- GLUSTER_CLI_BARRIER_VOLUME,
- glusterd_handle_barrier, NULL, 0, DRC_NA},
- [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", GLUSTER_CLI_GET_VOL_OPT,
- glusterd_handle_get_vol_opt, NULL, 0, DRC_NA},
- [GLUSTER_CLI_BITROT] = {"BITROT", GLUSTER_CLI_BITROT,
- glusterd_handle_bitrot, NULL, 0, DRC_NA},
- [GLUSTER_CLI_GET_STATE] = {"GET_STATE", GLUSTER_CLI_GET_STATE,
- glusterd_handle_get_state, NULL, 0, DRC_NA},
- [GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", GLUSTER_CLI_RESET_BRICK,
- glusterd_handle_reset_brick, NULL, 0, DRC_NA},
- [GLUSTER_CLI_TIER] = {"TIER", GLUSTER_CLI_TIER, glusterd_handle_tier, NULL,
- 0, DRC_NA},
+ NULL, GLUSTER_CLI_CLRLOCKS_VOLUME, DRC_NA,
+ 0},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", glusterd_handle_copy_file, NULL,
+ GLUSTER_CLI_COPY_FILE, DRC_NA, 0},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", glusterd_handle_sys_exec, NULL,
+ GLUSTER_CLI_SYS_EXEC, DRC_NA, 0},
+ [GLUSTER_CLI_SNAP] = {"SNAP", glusterd_handle_snapshot, NULL,
+ GLUSTER_CLI_SNAP, DRC_NA, 0},
+ [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME", glusterd_handle_barrier,
+ NULL, GLUSTER_CLI_BARRIER_VOLUME, DRC_NA,
+ 0},
+ [GLUSTER_CLI_GANESHA] = {"GANESHA", glusterd_handle_ganesha_cmd, NULL,
+ GLUSTER_CLI_GANESHA, DRC_NA, 0},
+ [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", glusterd_handle_get_vol_opt,
+ NULL, DRC_NA, 0},
+ [GLUSTER_CLI_BITROT] = {"BITROT", glusterd_handle_bitrot, NULL,